Improving chat functionality significantly throughout the application.

This commit is contained in:
Josako
2024-06-12 11:07:18 +02:00
parent 27b6de8734
commit be311c440b
22 changed files with 604 additions and 127 deletions

View File

@@ -1,6 +1,8 @@
import ast
import os
from datetime import datetime as dt, timezone as tz
import chardet
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
from flask_security import roles_accepted, current_user
from sqlalchemy import desc
@@ -89,7 +91,7 @@ def add_url():
url = form.url.data
html = fetch_html(url)
file = io.StringIO(html)
file = io.BytesIO(html)
parsed_url = urlparse(url)
path_parts = parsed_url.path.split('/')
@@ -148,6 +150,11 @@ def handle_document_selection():
return redirect(prefixed_url_for('document_bp.edit_document', document_id=doc_id))
case 'document_versions':
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
case 'refresh_document':
refresh_document(doc_id)
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
case 're_embed_latest_versions':
re_embed_latest_versions()
# Add more conditions for other actions
return redirect(prefixed_url_for('document_bp.documents'))
@@ -210,7 +217,6 @@ def edit_document_version(document_version_id):
@document_bp.route('/document_versions/<int:document_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def document_versions(document_id):
flash(f'Processing documents is a long running process. Please be careful retriggering processing!', 'danger')
doc_vers = DocumentVersion.query.get_or_404(document_id)
doc_desc = f'Document {doc_vers.document.name}, Language {doc_vers.language}'
@@ -227,7 +233,7 @@ def document_versions(document_id):
rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''), ('file_location', ''),
('file_name', ''), ('file_type', ''),
('processing', ''), ('processing_started_at', ''),
('processing_finished_at', '')])
('processing_finished_at', ''), ('processing_error', '')])
return render_template('document/document_versions.html', rows=rows, pagination=pagination, document=doc_desc)
@@ -248,7 +254,91 @@ def handle_document_version_selection():
# Add more conditions for other actions
doc_vers = DocumentVersion.query.get_or_404(doc_vers_id)
return redirect(prefixed_url_for('document_bp.document_versions', document_language_id=doc_vers.doc_lang_id))
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id))
@document_bp.route('/library_operations', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def library_operations():
return render_template('document/library_operations.html')
@document_bp.route('/handle_library_selection', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_library_selection():
action = request.form['action']
match action:
case 're_embed_latest_versions':
re_embed_latest_versions()
case 'refresh_all_documents':
refresh_all_documents()
return redirect(prefixed_url_for('document_bp.library_operations'))
def refresh_all_documents():
for doc in Document.query.all():
refresh_document(doc.id)
def refresh_document(doc_id):
doc = Document.query.get_or_404(doc_id)
doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
if not doc_vers.url:
current_app.logger.info(f'Document {doc_id} has no URL, skipping refresh')
flash(f'This document has no URL. I can only refresh documents with a URL. skipping refresh', 'alert')
return
new_doc_vers = create_version_for_document(doc, doc_vers.url, doc_vers.language, doc_vers.user_context)
try:
db.session.add(new_doc_vers)
db.session.commit()
except SQLAlchemyError as e:
current_app.logger.error(f'Error refreshing document {doc_id} for tenant {session["tenant"]["id"]}: {e}')
flash('Error refreshing document.', 'alert')
db.session.rollback()
error = e.args
raise
except Exception as e:
current_app.logger.error('Unknown error')
raise
html = fetch_html(new_doc_vers.url)
file = io.BytesIO(html)
parsed_url = urlparse(new_doc_vers.url)
path_parts = parsed_url.path.split('/')
filename = path_parts[-1]
if filename == '':
filename = 'index'
if not filename.endswith('.html'):
filename += '.html'
extension = 'html'
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}')
upload_file_for_version(new_doc_vers, file, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Embedding creation task: {task.id}')
flash(f'Processing on document {doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
'success')
def re_embed_latest_versions():
docs = Document.query.all()
for doc in docs:
latest_doc_version = DocumentVersion.query.filter_by(doc_id=doc.id).order_by(desc(DocumentVersion.id)).first()
if latest_doc_version:
process_version(latest_doc_version.id)
def process_version(version_id):
@@ -283,7 +373,7 @@ def create_document_stack(form, file, filename, extension):
new_doc = create_document(form, filename)
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, form.language.data, form.user_context.data)
new_doc_vers = create_version_for_document(new_doc, form.url.data, form.language.data, form.user_context.data)
try:
db.session.add(new_doc)
@@ -329,8 +419,11 @@ def create_document(form, filename):
return new_doc
def create_version_for_document(document, language, user_context):
def create_version_for_document(document, url, language, user_context):
new_doc_vers = DocumentVersion()
if url != '':
new_doc_vers.url = url
if language == '':
new_doc_vers.language = session['default_language']
else:
@@ -356,12 +449,11 @@ def upload_file_for_version(doc_vers, file, extension):
os.makedirs(upload_path, exist_ok=True)
if isinstance(file, FileStorage):
file.save(os.path.join(upload_path, doc_vers.file_name))
elif isinstance(file, io.StringIO):
# It's a StringIO object, handle accordingly
elif isinstance(file, io.BytesIO):
# It's a BytesIO object, handle accordingly
# Example: write content to a file manually
content = file.getvalue()
with open(os.path.join(upload_path, doc_vers.file_name), 'w', encoding='utf-8') as file:
file.write(content)
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
f.write(file.getvalue())
else:
raise TypeError('Unsupported file type.')
@@ -392,7 +484,7 @@ def fetch_html(url):
response = None
response.raise_for_status() # Will raise an exception for bad requests
return response.text
return response.content
def prepare_document_data(docs):

View File

@@ -267,7 +267,7 @@ def handle_tenant_selection():
case 'edit_tenant':
return redirect(prefixed_url_for('user_bp.edit_tenant', tenant_id=tenant_id))
case 'select_tenant':
return redirect(prefixed_url_for('basic_bp.session_defaults'))
return redirect(prefixed_url_for('user_bp.tenant_overview'))
# Add more conditions for other actions
return redirect(prefixed_url_for('select_tenant'))