Improving chat functionality significantly throughout the application.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import ast
|
||||
import os
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
|
||||
import chardet
|
||||
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
|
||||
from flask_security import roles_accepted, current_user
|
||||
from sqlalchemy import desc
|
||||
@@ -89,7 +91,7 @@ def add_url():
|
||||
url = form.url.data
|
||||
|
||||
html = fetch_html(url)
|
||||
file = io.StringIO(html)
|
||||
file = io.BytesIO(html)
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
path_parts = parsed_url.path.split('/')
|
||||
@@ -148,6 +150,11 @@ def handle_document_selection():
|
||||
return redirect(prefixed_url_for('document_bp.edit_document', document_id=doc_id))
|
||||
case 'document_versions':
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
|
||||
case 'refresh_document':
|
||||
refresh_document(doc_id)
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
|
||||
case 're_embed_latest_versions':
|
||||
re_embed_latest_versions()
|
||||
|
||||
# Add more conditions for other actions
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
@@ -210,7 +217,6 @@ def edit_document_version(document_version_id):
|
||||
@document_bp.route('/document_versions/<int:document_id>', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def document_versions(document_id):
|
||||
flash(f'Processing documents is a long running process. Please be careful retriggering processing!', 'danger')
|
||||
doc_vers = DocumentVersion.query.get_or_404(document_id)
|
||||
doc_desc = f'Document {doc_vers.document.name}, Language {doc_vers.language}'
|
||||
|
||||
@@ -227,7 +233,7 @@ def document_versions(document_id):
|
||||
rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''), ('file_location', ''),
|
||||
('file_name', ''), ('file_type', ''),
|
||||
('processing', ''), ('processing_started_at', ''),
|
||||
('processing_finished_at', '')])
|
||||
('processing_finished_at', ''), ('processing_error', '')])
|
||||
|
||||
return render_template('document/document_versions.html', rows=rows, pagination=pagination, document=doc_desc)
|
||||
|
||||
@@ -248,7 +254,91 @@ def handle_document_version_selection():
|
||||
# Add more conditions for other actions
|
||||
|
||||
doc_vers = DocumentVersion.query.get_or_404(doc_vers_id)
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_language_id=doc_vers.doc_lang_id))
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id))
|
||||
|
||||
|
||||
@document_bp.route('/library_operations', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def library_operations():
|
||||
return render_template('document/library_operations.html')
|
||||
|
||||
|
||||
@document_bp.route('/handle_library_selection', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def handle_library_selection():
|
||||
action = request.form['action']
|
||||
|
||||
match action:
|
||||
case 're_embed_latest_versions':
|
||||
re_embed_latest_versions()
|
||||
case 'refresh_all_documents':
|
||||
refresh_all_documents()
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.library_operations'))
|
||||
|
||||
|
||||
def refresh_all_documents():
|
||||
for doc in Document.query.all():
|
||||
refresh_document(doc.id)
|
||||
|
||||
|
||||
def refresh_document(doc_id):
|
||||
doc = Document.query.get_or_404(doc_id)
|
||||
doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
||||
if not doc_vers.url:
|
||||
current_app.logger.info(f'Document {doc_id} has no URL, skipping refresh')
|
||||
flash(f'This document has no URL. I can only refresh documents with a URL. skipping refresh', 'alert')
|
||||
return
|
||||
|
||||
new_doc_vers = create_version_for_document(doc, doc_vers.url, doc_vers.language, doc_vers.user_context)
|
||||
|
||||
try:
|
||||
db.session.add(new_doc_vers)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Error refreshing document {doc_id} for tenant {session["tenant"]["id"]}: {e}')
|
||||
flash('Error refreshing document.', 'alert')
|
||||
db.session.rollback()
|
||||
error = e.args
|
||||
raise
|
||||
except Exception as e:
|
||||
current_app.logger.error('Unknown error')
|
||||
raise
|
||||
|
||||
html = fetch_html(new_doc_vers.url)
|
||||
file = io.BytesIO(html)
|
||||
|
||||
parsed_url = urlparse(new_doc_vers.url)
|
||||
path_parts = parsed_url.path.split('/')
|
||||
filename = path_parts[-1]
|
||||
if filename == '':
|
||||
filename = 'index'
|
||||
if not filename.endswith('.html'):
|
||||
filename += '.html'
|
||||
extension = 'html'
|
||||
|
||||
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}')
|
||||
|
||||
upload_file_for_version(new_doc_vers, file, extension)
|
||||
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.id,
|
||||
])
|
||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}. '
|
||||
f'Embedding creation task: {task.id}')
|
||||
flash(f'Processing on document {doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
||||
'success')
|
||||
|
||||
|
||||
def re_embed_latest_versions():
|
||||
docs = Document.query.all()
|
||||
for doc in docs:
|
||||
latest_doc_version = DocumentVersion.query.filter_by(doc_id=doc.id).order_by(desc(DocumentVersion.id)).first()
|
||||
if latest_doc_version:
|
||||
process_version(latest_doc_version.id)
|
||||
|
||||
|
||||
def process_version(version_id):
|
||||
@@ -283,7 +373,7 @@ def create_document_stack(form, file, filename, extension):
|
||||
new_doc = create_document(form, filename)
|
||||
|
||||
# Create the DocumentVersion
|
||||
new_doc_vers = create_version_for_document(new_doc, form.language.data, form.user_context.data)
|
||||
new_doc_vers = create_version_for_document(new_doc, form.url.data, form.language.data, form.user_context.data)
|
||||
|
||||
try:
|
||||
db.session.add(new_doc)
|
||||
@@ -329,8 +419,11 @@ def create_document(form, filename):
|
||||
return new_doc
|
||||
|
||||
|
||||
def create_version_for_document(document, language, user_context):
|
||||
def create_version_for_document(document, url, language, user_context):
|
||||
new_doc_vers = DocumentVersion()
|
||||
if url != '':
|
||||
new_doc_vers.url = url
|
||||
|
||||
if language == '':
|
||||
new_doc_vers.language = session['default_language']
|
||||
else:
|
||||
@@ -356,12 +449,11 @@ def upload_file_for_version(doc_vers, file, extension):
|
||||
os.makedirs(upload_path, exist_ok=True)
|
||||
if isinstance(file, FileStorage):
|
||||
file.save(os.path.join(upload_path, doc_vers.file_name))
|
||||
elif isinstance(file, io.StringIO):
|
||||
# It's a StringIO object, handle accordingly
|
||||
elif isinstance(file, io.BytesIO):
|
||||
# It's a BytesIO object, handle accordingly
|
||||
# Example: write content to a file manually
|
||||
content = file.getvalue()
|
||||
with open(os.path.join(upload_path, doc_vers.file_name), 'w', encoding='utf-8') as file:
|
||||
file.write(content)
|
||||
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
|
||||
f.write(file.getvalue())
|
||||
else:
|
||||
raise TypeError('Unsupported file type.')
|
||||
|
||||
@@ -392,7 +484,7 @@ def fetch_html(url):
|
||||
response = None
|
||||
|
||||
response.raise_for_status() # Will raise an exception for bad requests
|
||||
return response.text
|
||||
return response.content
|
||||
|
||||
|
||||
def prepare_document_data(docs):
|
||||
|
||||
Reference in New Issue
Block a user