Improving chat functionality significantly throughout the application.

This commit is contained in:
Josako
2024-06-12 11:07:18 +02:00
parent 27b6de8734
commit be311c440b
22 changed files with 604 additions and 127 deletions

View File

@@ -10,10 +10,10 @@
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_document_version_selection') }}">
{{ render_selectable_table(headers=["Document Version ID", "URL", "File Location", "File Name", "File Type", "Processing", "Processing Start", "Proceeing Finish"], rows=rows, selectable=True, id="versionsTable") }}
{{ render_selectable_table(headers=["ID", "URL", "File Loc.", "File Name", "File Type", "Process.", "Proces. Start", "Proces. Finish", "Proces. Error"], rows=rows, selectable=True, id="versionsTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="edit_document_version" class="btn btn-primary">Edit Document Version</button>
<button type="submit" name="action" value="process_document_version" class="btn btn-secondary">Process Document Version</button>
<button type="submit" name="action" value="process_document_version" class="btn btn-danger">Process Document Version</button>
</div>
</form>
</div>

View File

@@ -5,7 +5,7 @@
{% block content_title %}Documents{% endblock %}
{% block content_description %}View Documents for Tenant{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto">{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
@@ -14,6 +14,7 @@
<div class="form-group mt-3">
<button type="submit" name="action" value="edit_document" class="btn btn-primary">Edit Document</button>
<button type="submit" name="action" value="document_versions" class="btn btn-secondary">Show Document Versions</button>
<button type="submit" name="action" value="refresh_document" class="btn btn-secondary">Refresh Document (new version)</button>
</div>
</form>
</div>

View File

@@ -0,0 +1,31 @@
{% extends 'base.html' %}
{% block title %}Library Operations{% endblock %}
{% block content_title %}Library Operations{% endblock %}
{% block content_description %}Perform operations on the entire library of documents.{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_library_selection') }}">
<div class="form-group mt-3">
<h2>Re-Embed Latest Versions</h2>
<p>This functionality will re-apply embeddings on the latest versions of all documents in the library.
This is useful only while tuning the embedding parameters, or when changing embedding algorithms.
As it is an expensive operation and highly impacts the performance of the system in future use,
use it with caution!
</p>
<button type="submit" name="action" value="re_embed_latest_versions" class="btn btn-danger">Re-embed Latest Versions (expensive)</button>
<h2>Refresh all documents</h2>
<p>This operation will create new versions of all documents in the library with a URL. Documents that were uploaded directly,
cannot be automatically refreshed. This is an expensive operation, and impacts the performance of the system in future use.
Please use it with caution!
</p>
<button type="submit" name="action" value="refresh_all_documents" class="btn btn-danger">Refresh All Documents (expensive)</button>
</p>
</div>
</form>
</div>
{% endblock %}

View File

@@ -84,6 +84,7 @@
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Documents', 'url': '/document/documents', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Library Operations', 'url': '/document/library_operations', 'roles': ['Super User', 'Tenant Admin']},
]) }}
{% endif %}
{% if current_user.is_authenticated %}

View File

@@ -1,6 +1,8 @@
import ast
import os
from datetime import datetime as dt, timezone as tz
import chardet
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
from flask_security import roles_accepted, current_user
from sqlalchemy import desc
@@ -89,7 +91,7 @@ def add_url():
url = form.url.data
html = fetch_html(url)
file = io.StringIO(html)
file = io.BytesIO(html)
parsed_url = urlparse(url)
path_parts = parsed_url.path.split('/')
@@ -148,6 +150,11 @@ def handle_document_selection():
return redirect(prefixed_url_for('document_bp.edit_document', document_id=doc_id))
case 'document_versions':
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
case 'refresh_document':
refresh_document(doc_id)
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
case 're_embed_latest_versions':
re_embed_latest_versions()
# Add more conditions for other actions
return redirect(prefixed_url_for('document_bp.documents'))
@@ -210,7 +217,6 @@ def edit_document_version(document_version_id):
@document_bp.route('/document_versions/<int:document_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def document_versions(document_id):
flash(f'Processing documents is a long running process. Please be careful retriggering processing!', 'danger')
doc_vers = DocumentVersion.query.get_or_404(document_id)
doc_desc = f'Document {doc_vers.document.name}, Language {doc_vers.language}'
@@ -227,7 +233,7 @@ def document_versions(document_id):
rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''), ('file_location', ''),
('file_name', ''), ('file_type', ''),
('processing', ''), ('processing_started_at', ''),
('processing_finished_at', '')])
('processing_finished_at', ''), ('processing_error', '')])
return render_template('document/document_versions.html', rows=rows, pagination=pagination, document=doc_desc)
@@ -248,7 +254,91 @@ def handle_document_version_selection():
# Add more conditions for other actions
doc_vers = DocumentVersion.query.get_or_404(doc_vers_id)
return redirect(prefixed_url_for('document_bp.document_versions', document_language_id=doc_vers.doc_lang_id))
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id))
@document_bp.route('/library_operations', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def library_operations():
return render_template('document/library_operations.html')
@document_bp.route('/handle_library_selection', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_library_selection():
action = request.form['action']
match action:
case 're_embed_latest_versions':
re_embed_latest_versions()
case 'refresh_all_documents':
refresh_all_documents()
return redirect(prefixed_url_for('document_bp.library_operations'))
def refresh_all_documents():
for doc in Document.query.all():
refresh_document(doc.id)
def refresh_document(doc_id):
doc = Document.query.get_or_404(doc_id)
doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
if not doc_vers.url:
current_app.logger.info(f'Document {doc_id} has no URL, skipping refresh')
flash(f'This document has no URL. I can only refresh documents with a URL. skipping refresh', 'alert')
return
new_doc_vers = create_version_for_document(doc, doc_vers.url, doc_vers.language, doc_vers.user_context)
try:
db.session.add(new_doc_vers)
db.session.commit()
except SQLAlchemyError as e:
current_app.logger.error(f'Error refreshing document {doc_id} for tenant {session["tenant"]["id"]}: {e}')
flash('Error refreshing document.', 'alert')
db.session.rollback()
error = e.args
raise
except Exception as e:
current_app.logger.error('Unknown error')
raise
html = fetch_html(new_doc_vers.url)
file = io.BytesIO(html)
parsed_url = urlparse(new_doc_vers.url)
path_parts = parsed_url.path.split('/')
filename = path_parts[-1]
if filename == '':
filename = 'index'
if not filename.endswith('.html'):
filename += '.html'
extension = 'html'
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}')
upload_file_for_version(new_doc_vers, file, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Embedding creation task: {task.id}')
flash(f'Processing on document {doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
'success')
def re_embed_latest_versions():
docs = Document.query.all()
for doc in docs:
latest_doc_version = DocumentVersion.query.filter_by(doc_id=doc.id).order_by(desc(DocumentVersion.id)).first()
if latest_doc_version:
process_version(latest_doc_version.id)
def process_version(version_id):
@@ -283,7 +373,7 @@ def create_document_stack(form, file, filename, extension):
new_doc = create_document(form, filename)
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, form.language.data, form.user_context.data)
new_doc_vers = create_version_for_document(new_doc, form.url.data, form.language.data, form.user_context.data)
try:
db.session.add(new_doc)
@@ -329,8 +419,11 @@ def create_document(form, filename):
return new_doc
def create_version_for_document(document, language, user_context):
def create_version_for_document(document, url, language, user_context):
new_doc_vers = DocumentVersion()
if url != '':
new_doc_vers.url = url
if language == '':
new_doc_vers.language = session['default_language']
else:
@@ -356,12 +449,11 @@ def upload_file_for_version(doc_vers, file, extension):
os.makedirs(upload_path, exist_ok=True)
if isinstance(file, FileStorage):
file.save(os.path.join(upload_path, doc_vers.file_name))
elif isinstance(file, io.StringIO):
# It's a StringIO object, handle accordingly
elif isinstance(file, io.BytesIO):
# It's a BytesIO object, handle accordingly
# Example: write content to a file manually
content = file.getvalue()
with open(os.path.join(upload_path, doc_vers.file_name), 'w', encoding='utf-8') as file:
file.write(content)
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
f.write(file.getvalue())
else:
raise TypeError('Unsupported file type.')
@@ -392,7 +484,7 @@ def fetch_html(url):
response = None
response.raise_for_status() # Will raise an exception for bad requests
return response.text
return response.content
def prepare_document_data(docs):

View File

@@ -267,7 +267,7 @@ def handle_tenant_selection():
case 'edit_tenant':
return redirect(prefixed_url_for('user_bp.edit_tenant', tenant_id=tenant_id))
case 'select_tenant':
return redirect(prefixed_url_for('basic_bp.session_defaults'))
return redirect(prefixed_url_for('user_bp.tenant_overview'))
# Add more conditions for other actions
return redirect(prefixed_url_for('select_tenant'))