- View License Usages - Celery Beat container added - First schedule in Celery Beat for calculating usage (hourly) - repopack can now split for different components - Various fixes as consequece of changing file_location / file_name ==> bucket_name / object_name - Celery Routing / Queuing updated
444 lines
18 KiB
Python
444 lines
18 KiB
Python
import ast
|
|
from datetime import datetime as dt, timezone as tz
|
|
|
|
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
|
|
from flask_security import roles_accepted, current_user
|
|
from sqlalchemy import desc
|
|
from werkzeug.utils import secure_filename
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
import requests
|
|
from requests.exceptions import SSLError
|
|
from urllib.parse import urlparse, unquote
|
|
import io
|
|
import json
|
|
|
|
from common.models.document import Document, DocumentVersion
|
|
from common.extensions import db, minio_client
|
|
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
|
|
process_multiple_urls, get_documents_list, edit_document, \
|
|
edit_document_version, refresh_document
|
|
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
|
|
EveAIDoubleURLException
|
|
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
|
|
from common.utils.middleware import mw_before_request
|
|
from common.utils.celery_utils import current_celery
|
|
from common.utils.nginx_utils import prefixed_url_for
|
|
from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict
|
|
from .document_version_list_view import DocumentVersionListView
|
|
|
|
document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
|
|
|
|
|
|
@document_bp.before_request
|
|
def log_before_request():
|
|
current_app.logger.debug(f"Before request (document_bp): {request.method} {request.url}")
|
|
|
|
|
|
@document_bp.after_request
|
|
def log_after_request(response):
|
|
current_app.logger.debug(
|
|
f"After request (document_bp): {request.method} {request.url} - Status: {response.status}")
|
|
return response
|
|
|
|
|
|
@document_bp.before_request
|
|
def before_request():
|
|
try:
|
|
mw_before_request()
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error switching schema in Document Blueprint: {e}')
|
|
for role in current_user.roles:
|
|
current_app.logger.debug(f'User {current_user.email} has role {role.name}')
|
|
raise
|
|
|
|
|
|
@document_bp.route('/add_document', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def add_document():
|
|
form = AddDocumentForm()
|
|
|
|
if form.validate_on_submit():
|
|
try:
|
|
tenant_id = session['tenant']['id']
|
|
file = form.file.data
|
|
filename = secure_filename(file.filename)
|
|
extension = filename.rsplit('.', 1)[1].lower()
|
|
|
|
validate_file_type(extension)
|
|
|
|
current_app.logger.debug(f'Language on form: {form.language.data}')
|
|
api_input = {
|
|
'name': form.name.data,
|
|
'language': form.language.data,
|
|
'user_context': form.user_context.data,
|
|
'valid_from': form.valid_from.data,
|
|
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
|
|
}
|
|
current_app.logger.debug(f'Creating document stack with input {api_input}')
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'success')
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
flash(str(e), 'error')
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
flash('An error occurred while adding the document.', 'error')
|
|
|
|
return render_template('document/add_document.html', form=form)
|
|
|
|
|
|
@document_bp.route('/add_url', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def add_url():
|
|
form = AddURLForm()
|
|
|
|
if form.validate_on_submit():
|
|
try:
|
|
tenant_id = session['tenant']['id']
|
|
url = form.url.data
|
|
|
|
file_content, filename, extension = process_url(url, tenant_id)
|
|
|
|
api_input = {
|
|
'name': form.name.data or filename,
|
|
'url': url,
|
|
'language': form.language.data,
|
|
'user_context': form.user_context.data,
|
|
'valid_from': form.valid_from.data,
|
|
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'success')
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
|
|
except EveAIDoubleURLException:
|
|
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
flash(str(e), 'error')
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
flash('An error occurred while adding the document.', 'error')
|
|
|
|
return render_template('document/add_url.html', form=form)
|
|
|
|
|
|
@document_bp.route('/add_urls', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def add_urls():
|
|
form = AddURLsForm()
|
|
|
|
if form.validate_on_submit():
|
|
try:
|
|
tenant_id = session['tenant']['id']
|
|
urls = form.urls.data.split('\n')
|
|
urls = [url.strip() for url in urls if url.strip()]
|
|
|
|
api_input = {
|
|
'name': form.name.data,
|
|
'language': form.language.data,
|
|
'user_context': form.user_context.data,
|
|
'valid_from': form.valid_from.data
|
|
}
|
|
|
|
results = process_multiple_urls(urls, tenant_id, api_input)
|
|
|
|
for result in results:
|
|
if result['status'] == 'success':
|
|
flash(
|
|
f"Processed URL: {result['url']} - Document ID: {result['document_id']}, Version ID: {result['document_version_id']}",
|
|
'success')
|
|
else:
|
|
flash(f"Error processing URL: {result['url']} - {result['message']}", 'error')
|
|
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding multiple URLs: {str(e)}')
|
|
flash('An error occurred while adding the URLs.', 'error')
|
|
|
|
return render_template('document/add_urls.html', form=form)
|
|
|
|
|
|
@document_bp.route('/documents', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def documents():
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 10, type=int)
|
|
|
|
pagination = get_documents_list(page, per_page)
|
|
docs = pagination.items
|
|
|
|
rows = prepare_table_for_macro(docs, [('id', ''), ('name', ''), ('valid_from', ''), ('valid_to', '')])
|
|
|
|
return render_template('document/documents.html', rows=rows, pagination=pagination)
|
|
|
|
|
|
@document_bp.route('/handle_document_selection', methods=['POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def handle_document_selection():
|
|
document_identification = request.form['selected_row']
|
|
doc_id = ast.literal_eval(document_identification).get('value')
|
|
|
|
action = request.form['action']
|
|
|
|
match action:
|
|
case 'edit_document':
|
|
return redirect(prefixed_url_for('document_bp.edit_document_view', document_id=doc_id))
|
|
case 'document_versions':
|
|
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
|
|
case 'refresh_document':
|
|
refresh_document_view(doc_id)
|
|
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
|
|
case 're_embed_latest_versions':
|
|
re_embed_latest_versions()
|
|
|
|
# Add more conditions for other actions
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
|
|
|
|
@document_bp.route('/edit_document/<int:document_id>', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def edit_document_view(document_id):
|
|
doc = Document.query.get_or_404(document_id)
|
|
form = EditDocumentForm(obj=doc)
|
|
|
|
if form.validate_on_submit():
|
|
updated_doc, error = edit_document(
|
|
document_id,
|
|
form.name.data,
|
|
form.valid_from.data,
|
|
form.valid_to.data
|
|
)
|
|
if updated_doc:
|
|
flash(f'Document {updated_doc.id} updated successfully', 'success')
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
else:
|
|
flash(f'Error updating document: {error}', 'danger')
|
|
else:
|
|
form_validation_failed(request, form)
|
|
|
|
return render_template('document/edit_document.html', form=form, document_id=document_id)
|
|
|
|
|
|
@document_bp.route('/edit_document_version/<int:document_version_id>', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def edit_document_version_view(document_version_id):
|
|
doc_vers = DocumentVersion.query.get_or_404(document_version_id)
|
|
form = EditDocumentVersionForm(obj=doc_vers)
|
|
|
|
if form.validate_on_submit():
|
|
updated_version, error = edit_document_version(
|
|
document_version_id,
|
|
form.user_context.data
|
|
)
|
|
if updated_version:
|
|
flash(f'Document Version {updated_version.id} updated successfully', 'success')
|
|
return redirect(prefixed_url_for('document_bp.document_versions', document_id=updated_version.doc_id))
|
|
else:
|
|
flash(f'Error updating document version: {error}', 'danger')
|
|
else:
|
|
form_validation_failed(request, form)
|
|
|
|
return render_template('document/edit_document_version.html', form=form, document_version_id=document_version_id,
|
|
doc_details=f'Document {doc_vers.document.name}')
|
|
|
|
|
|
@document_bp.route('/document_versions/<int:document_id>', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def document_versions(document_id):
|
|
doc = Document.query.get_or_404(document_id)
|
|
doc_desc = f'Document {doc.name}'
|
|
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 10, type=int)
|
|
|
|
query = (DocumentVersion.query.filter_by(doc_id=document_id)
|
|
.order_by(DocumentVersion.language)
|
|
.order_by(desc(DocumentVersion.id)))
|
|
|
|
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
|
|
doc_langs = pagination.items
|
|
|
|
rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''),
|
|
('object_name', ''), ('file_type', ''),
|
|
('processing', ''), ('processing_started_at', ''),
|
|
('processing_finished_at', ''), ('processing_error', '')])
|
|
|
|
return render_template('document/document_versions.html', rows=rows, pagination=pagination, document=doc_desc)
|
|
|
|
|
|
@document_bp.route('/handle_document_version_selection', methods=['POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def handle_document_version_selection():
|
|
document_version_identification = request.form['selected_row']
|
|
doc_vers_id = ast.literal_eval(document_version_identification).get('value')
|
|
|
|
action = request.form['action']
|
|
|
|
current_app.logger.debug(f'Triggered Document Version Action: {action}')
|
|
|
|
match action:
|
|
case 'edit_document_version':
|
|
return redirect(prefixed_url_for('document_bp.edit_document_version_view', document_version_id=doc_vers_id))
|
|
case 'process_document_version':
|
|
process_version(doc_vers_id)
|
|
# Add more conditions for other actions
|
|
|
|
doc_vers = DocumentVersion.query.get_or_404(doc_vers_id)
|
|
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id))
|
|
|
|
|
|
@document_bp.route('/library_operations', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def library_operations():
|
|
return render_template('document/library_operations.html')
|
|
|
|
|
|
@document_bp.route('/handle_library_selection', methods=['GET', 'POST'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def handle_library_selection():
|
|
action = request.form['action']
|
|
|
|
match action:
|
|
case 're_embed_latest_versions':
|
|
re_embed_latest_versions()
|
|
case 'refresh_all_documents':
|
|
refresh_all_documents()
|
|
|
|
return redirect(prefixed_url_for('document_bp.library_operations'))
|
|
|
|
|
|
@document_bp.route('/document_versions_list', methods=['GET'])
|
|
@roles_accepted('Super User', 'Tenant Admin')
|
|
def document_versions_list():
|
|
current_app.logger.debug('Getting document versions list')
|
|
view = DocumentVersionListView(DocumentVersion, 'document/document_versions_list_view.html', per_page=20)
|
|
current_app.logger.debug('Got document versions list')
|
|
return view.get()
|
|
|
|
|
|
def refresh_all_documents():
|
|
for doc in Document.query.all():
|
|
refresh_document(doc.id)
|
|
|
|
|
|
def refresh_document_view(document_id):
|
|
new_version, result = refresh_document(document_id)
|
|
if new_version:
|
|
flash(f'Document refreshed. New version: {new_version.id}. Task ID: {result}', 'success')
|
|
else:
|
|
flash(f'Error refreshing document: {result}', 'danger')
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
|
|
|
|
def re_embed_latest_versions():
|
|
docs = Document.query.all()
|
|
for doc in docs:
|
|
latest_doc_version = DocumentVersion.query.filter_by(doc_id=doc.id).order_by(desc(DocumentVersion.id)).first()
|
|
if latest_doc_version:
|
|
process_version(latest_doc_version.id)
|
|
|
|
|
|
def process_version(version_id):
|
|
task = current_celery.send_task('create_embeddings',
|
|
args=[session['tenant']['id'], version_id,],
|
|
queue='embeddings')
|
|
current_app.logger.info(f'Embedding creation retriggered by user {current_user.id}, {current_user.email} '
|
|
f'for tenant {session["tenant"]["id"]}, '
|
|
f'Document Version {version_id}. '
|
|
f'Embedding creation task: {task.id}')
|
|
|
|
flash(f'Processing for document version {version_id} retriggered successfully...', 'success')
|
|
|
|
return redirect(prefixed_url_for('document_bp.documents'))
|
|
|
|
|
|
def set_logging_information(obj, timestamp):
|
|
obj.created_at = timestamp
|
|
obj.updated_at = timestamp
|
|
obj.created_by = current_user.id
|
|
obj.updated_by = current_user.id
|
|
|
|
|
|
def update_logging_information(obj, timestamp):
|
|
obj.updated_at = timestamp
|
|
obj.updated_by = current_user.id
|
|
|
|
|
|
def log_session_state(session, msg=""):
|
|
current_app.logger.debug(f"{msg} - Session dirty: {session.dirty}")
|
|
current_app.logger.debug(f"{msg} - Session new: {session.new}")
|
|
|
|
|
|
def fetch_html(url):
|
|
# Fetches HTML content from a URL
|
|
try:
|
|
response = requests.get(url)
|
|
except SSLError as e:
|
|
current_app.logger.error(f"Error fetching HTML from {url} for tenant {session['tenant']['id']}. "
|
|
f"Error Encountered: {e}")
|
|
if current_app.config.get('DEBUG'): # only allow when in a development environment
|
|
current_app.logger.info(f"Skipping SSL verification for {url} for tenant {session['tenant']['id']}. "
|
|
f"Only while in development environment.")
|
|
response = requests.get(url, verify=False) # Disable SSL verification
|
|
else:
|
|
response = None
|
|
|
|
response.raise_for_status() # Will raise an exception for bad requests
|
|
return response.content
|
|
|
|
|
|
def prepare_document_data(docs):
|
|
rows = []
|
|
for doc in docs:
|
|
doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
|
|
{'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
|
|
# Document basic details
|
|
if doc.valid_from:
|
|
doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
|
|
else:
|
|
doc_row.append({'value': '', 'class': '', 'type': 'text'})
|
|
|
|
# Nested languages and versions
|
|
languages_rows = []
|
|
for lang in doc.languages:
|
|
lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
|
|
|
|
# Latest version details if available (should be available ;-) )
|
|
if lang.latest_version:
|
|
lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
|
|
'class': '', 'type': 'text'})
|
|
if lang.latest_version.url:
|
|
lang_row.append({'value': lang.latest_version.url,
|
|
'class': '', 'type': 'link', 'href': lang.latest_version.url})
|
|
else:
|
|
lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
|
|
|
if lang.latest_version.file_name:
|
|
lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'})
|
|
else:
|
|
lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
|
|
|
if lang.latest_version.file_type:
|
|
lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
|
|
else:
|
|
lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
|
# Include other details as necessary
|
|
|
|
languages_rows.append(lang_row)
|
|
|
|
doc_row.append({'is_group': True, 'colspan': '5',
|
|
'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
|
|
'sub_rows': languages_rows})
|
|
rows.append(doc_row)
|
|
return rows
|