import ast from datetime import datetime as dt, timezone as tz from flask import request, redirect, flash, render_template, Blueprint, session, current_app from flask_security import roles_accepted, current_user from sqlalchemy import desc from werkzeug.utils import secure_filename from sqlalchemy.exc import SQLAlchemyError import requests from requests.exceptions import SSLError from urllib.parse import urlparse, unquote import io import json from common.models.document import Document, DocumentVersion from common.extensions import db, minio_client from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \ process_multiple_urls, get_documents_list, edit_document, \ edit_document_version, refresh_document from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \ EveAIDoubleURLException from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm from common.utils.middleware import mw_before_request from common.utils.celery_utils import current_celery from common.utils.nginx_utils import prefixed_url_for from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict from .document_version_list_view import DocumentVersionListView document_bp = Blueprint('document_bp', __name__, url_prefix='/document') @document_bp.before_request def log_before_request(): current_app.logger.debug(f"Before request (document_bp): {request.method} {request.url}") @document_bp.after_request def log_after_request(response): current_app.logger.debug( f"After request (document_bp): {request.method} {request.url} - Status: {response.status}") return response @document_bp.before_request def before_request(): try: mw_before_request() except Exception as e: current_app.logger.error(f'Error switching schema in Document Blueprint: {e}') for role in current_user.roles: current_app.logger.debug(f'User {current_user.email} has role {role.name}') raise @document_bp.route('/add_document', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def add_document(): form = AddDocumentForm() if form.validate_on_submit(): try: tenant_id = session['tenant']['id'] file = form.file.data filename = secure_filename(file.filename) extension = filename.rsplit('.', 1)[1].lower() validate_file_type(extension) current_app.logger.debug(f'Language on form: {form.language.data}') api_input = { 'name': form.name.data, 'language': form.language.data, 'user_context': form.user_context.data, 'valid_from': form.valid_from.data, 'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None, } current_app.logger.debug(f'Creating document stack with input {api_input}') new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id) task_id = start_embedding_task(tenant_id, new_doc_vers.id) flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.', 'success') return redirect(prefixed_url_for('document_bp.documents')) except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e: flash(str(e), 'error') except Exception as e: current_app.logger.error(f'Error adding document: {str(e)}') flash('An error occurred while adding the document.', 'error') return render_template('document/add_document.html', form=form) @document_bp.route('/add_url', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def add_url(): form = AddURLForm() if form.validate_on_submit(): try: tenant_id = session['tenant']['id'] url = form.url.data file_content, filename, extension = process_url(url, tenant_id) api_input = { 'name': form.name.data or filename, 'url': url, 'language': form.language.data, 'user_context': form.user_context.data, 'valid_from': form.valid_from.data, 'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None, } new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id) task_id = start_embedding_task(tenant_id, new_doc_vers.id) flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.', 'success') return redirect(prefixed_url_for('document_bp.documents')) except EveAIDoubleURLException: flash(f'A document with url {url} already exists. No new document created.', 'info') except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e: flash(str(e), 'error') except Exception as e: current_app.logger.error(f'Error adding document: {str(e)}') flash('An error occurred while adding the document.', 'error') return render_template('document/add_url.html', form=form) @document_bp.route('/add_urls', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def add_urls(): form = AddURLsForm() if form.validate_on_submit(): try: tenant_id = session['tenant']['id'] urls = form.urls.data.split('\n') urls = [url.strip() for url in urls if url.strip()] api_input = { 'name': form.name.data, 'language': form.language.data, 'user_context': form.user_context.data, 'valid_from': form.valid_from.data } results = process_multiple_urls(urls, tenant_id, api_input) for result in results: if result['status'] == 'success': flash( f"Processed URL: {result['url']} - Document ID: {result['document_id']}, Version ID: {result['document_version_id']}", 'success') else: flash(f"Error processing URL: {result['url']} - {result['message']}", 'error') return redirect(prefixed_url_for('document_bp.documents')) except Exception as e: current_app.logger.error(f'Error adding multiple URLs: {str(e)}') flash('An error occurred while adding the URLs.', 'error') return render_template('document/add_urls.html', form=form) @document_bp.route('/documents', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def documents(): page = request.args.get('page', 1, type=int) per_page = request.args.get('per_page', 10, type=int) pagination = get_documents_list(page, per_page) docs = pagination.items rows = prepare_table_for_macro(docs, [('id', ''), ('name', ''), ('valid_from', ''), ('valid_to', '')]) return render_template('document/documents.html', rows=rows, pagination=pagination) @document_bp.route('/handle_document_selection', methods=['POST']) @roles_accepted('Super User', 'Tenant Admin') def handle_document_selection(): document_identification = request.form['selected_row'] doc_id = ast.literal_eval(document_identification).get('value') action = request.form['action'] match action: case 'edit_document': return redirect(prefixed_url_for('document_bp.edit_document_view', document_id=doc_id)) case 'document_versions': return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id)) case 'refresh_document': refresh_document_view(doc_id) return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id)) case 're_embed_latest_versions': re_embed_latest_versions() # Add more conditions for other actions return redirect(prefixed_url_for('document_bp.documents')) @document_bp.route('/edit_document/', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def edit_document_view(document_id): doc = Document.query.get_or_404(document_id) form = EditDocumentForm(obj=doc) if form.validate_on_submit(): updated_doc, error = edit_document( document_id, form.name.data, form.valid_from.data, form.valid_to.data ) if updated_doc: flash(f'Document {updated_doc.id} updated successfully', 'success') return redirect(prefixed_url_for('document_bp.documents')) else: flash(f'Error updating document: {error}', 'danger') else: form_validation_failed(request, form) return render_template('document/edit_document.html', form=form, document_id=document_id) @document_bp.route('/edit_document_version/', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def edit_document_version_view(document_version_id): doc_vers = DocumentVersion.query.get_or_404(document_version_id) form = EditDocumentVersionForm(obj=doc_vers) if form.validate_on_submit(): updated_version, error = edit_document_version( document_version_id, form.user_context.data ) if updated_version: flash(f'Document Version {updated_version.id} updated successfully', 'success') return redirect(prefixed_url_for('document_bp.document_versions', document_id=updated_version.doc_id)) else: flash(f'Error updating document version: {error}', 'danger') else: form_validation_failed(request, form) return render_template('document/edit_document_version.html', form=form, document_version_id=document_version_id, doc_details=f'Document {doc_vers.document.name}') @document_bp.route('/document_versions/', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def document_versions(document_id): doc = Document.query.get_or_404(document_id) doc_desc = f'Document {doc.name}' page = request.args.get('page', 1, type=int) per_page = request.args.get('per_page', 10, type=int) query = (DocumentVersion.query.filter_by(doc_id=document_id) .order_by(DocumentVersion.language) .order_by(desc(DocumentVersion.id))) pagination = query.paginate(page=page, per_page=per_page, error_out=False) doc_langs = pagination.items rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''), ('object_name', ''), ('file_type', ''), ('processing', ''), ('processing_started_at', ''), ('processing_finished_at', ''), ('processing_error', '')]) return render_template('document/document_versions.html', rows=rows, pagination=pagination, document=doc_desc) @document_bp.route('/handle_document_version_selection', methods=['POST']) @roles_accepted('Super User', 'Tenant Admin') def handle_document_version_selection(): document_version_identification = request.form['selected_row'] doc_vers_id = ast.literal_eval(document_version_identification).get('value') action = request.form['action'] current_app.logger.debug(f'Triggered Document Version Action: {action}') match action: case 'edit_document_version': return redirect(prefixed_url_for('document_bp.edit_document_version_view', document_version_id=doc_vers_id)) case 'process_document_version': process_version(doc_vers_id) # Add more conditions for other actions doc_vers = DocumentVersion.query.get_or_404(doc_vers_id) return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id)) @document_bp.route('/library_operations', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def library_operations(): return render_template('document/library_operations.html') @document_bp.route('/handle_library_selection', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def handle_library_selection(): action = request.form['action'] match action: case 're_embed_latest_versions': re_embed_latest_versions() case 'refresh_all_documents': refresh_all_documents() return redirect(prefixed_url_for('document_bp.library_operations')) @document_bp.route('/document_versions_list', methods=['GET']) @roles_accepted('Super User', 'Tenant Admin') def document_versions_list(): current_app.logger.debug('Getting document versions list') view = DocumentVersionListView(DocumentVersion, 'document/document_versions_list_view.html', per_page=20) current_app.logger.debug('Got document versions list') return view.get() def refresh_all_documents(): for doc in Document.query.all(): refresh_document(doc.id) def refresh_document_view(document_id): new_version, result = refresh_document(document_id) if new_version: flash(f'Document refreshed. New version: {new_version.id}. Task ID: {result}', 'success') else: flash(f'Error refreshing document: {result}', 'danger') return redirect(prefixed_url_for('document_bp.documents')) def re_embed_latest_versions(): docs = Document.query.all() for doc in docs: latest_doc_version = DocumentVersion.query.filter_by(doc_id=doc.id).order_by(desc(DocumentVersion.id)).first() if latest_doc_version: process_version(latest_doc_version.id) def process_version(version_id): task = current_celery.send_task('create_embeddings', args=[session['tenant']['id'], version_id,], queue='embeddings') current_app.logger.info(f'Embedding creation retriggered by user {current_user.id}, {current_user.email} ' f'for tenant {session["tenant"]["id"]}, ' f'Document Version {version_id}. ' f'Embedding creation task: {task.id}') flash(f'Processing for document version {version_id} retriggered successfully...', 'success') return redirect(prefixed_url_for('document_bp.documents')) def set_logging_information(obj, timestamp): obj.created_at = timestamp obj.updated_at = timestamp obj.created_by = current_user.id obj.updated_by = current_user.id def update_logging_information(obj, timestamp): obj.updated_at = timestamp obj.updated_by = current_user.id def log_session_state(session, msg=""): current_app.logger.debug(f"{msg} - Session dirty: {session.dirty}") current_app.logger.debug(f"{msg} - Session new: {session.new}") def fetch_html(url): # Fetches HTML content from a URL try: response = requests.get(url) except SSLError as e: current_app.logger.error(f"Error fetching HTML from {url} for tenant {session['tenant']['id']}. " f"Error Encountered: {e}") if current_app.config.get('DEBUG'): # only allow when in a development environment current_app.logger.info(f"Skipping SSL verification for {url} for tenant {session['tenant']['id']}. " f"Only while in development environment.") response = requests.get(url, verify=False) # Disable SSL verification else: response = None response.raise_for_status() # Will raise an exception for bad requests return response.content def prepare_document_data(docs): rows = [] for doc in docs: doc_row = [{'value': doc.name, 'class': '', 'type': 'text'}, {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}] # Document basic details if doc.valid_from: doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'}) else: doc_row.append({'value': '', 'class': '', 'type': 'text'}) # Nested languages and versions languages_rows = [] for lang in doc.languages: lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}] # Latest version details if available (should be available ;-) ) if lang.latest_version: lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}) if lang.latest_version.url: lang_row.append({'value': lang.latest_version.url, 'class': '', 'type': 'link', 'href': lang.latest_version.url}) else: lang_row.append({'value': '', 'class': '', 'type': 'text'}) if lang.latest_version.file_name: lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'}) else: lang_row.append({'value': '', 'class': '', 'type': 'text'}) if lang.latest_version.file_type: lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'}) else: lang_row.append({'value': '', 'class': '', 'type': 'text'}) # Include other details as necessary languages_rows.append(lang_row) doc_row.append({'is_group': True, 'colspan': '5', 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'], 'sub_rows': languages_rows}) rows.append(doc_row) return rows