Improve HTML Processing + Introduction of Processed File viewer
This commit is contained in:
@@ -12,7 +12,7 @@ from requests.exceptions import SSLError
|
||||
import json
|
||||
|
||||
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
|
||||
from common.extensions import db, cache_manager
|
||||
from common.extensions import db, cache_manager, minio_client
|
||||
from common.models.interaction import Specialist, SpecialistRetriever
|
||||
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
|
||||
edit_document, \
|
||||
@@ -215,7 +215,7 @@ def edit_processor(processor_id):
|
||||
try:
|
||||
db.session.add(processor)
|
||||
db.session.commit()
|
||||
flash('Retriever updated successfully!', 'success')
|
||||
flash('Processor updated successfully!', 'success')
|
||||
current_app.logger.info(f'Processor {processor.id} updated successfully')
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
@@ -649,6 +649,7 @@ def handle_document_version_selection():
|
||||
return redirect(prefixed_url_for('document_bp.document_versions_list'))
|
||||
|
||||
action = request.form['action']
|
||||
current_app.logger.debug(f'Action: {action}')
|
||||
|
||||
match action:
|
||||
case 'edit_document_version':
|
||||
@@ -656,6 +657,9 @@ def handle_document_version_selection():
|
||||
case 'process_document_version':
|
||||
process_version(doc_vers_id)
|
||||
# Add more conditions for other actions
|
||||
case 'view_document_version_markdown':
|
||||
return redirect(prefixed_url_for('document_bp.view_document_version_markdown',
|
||||
document_version_id=doc_vers_id))
|
||||
|
||||
doc_vers = DocumentVersion.query.get_or_404(doc_vers_id)
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id))
|
||||
@@ -772,6 +776,45 @@ def document_versions_list():
|
||||
return view.get()
|
||||
|
||||
|
||||
@document_bp.route('/view_document_version_markdown/<int:document_version_id>', methods=['GET'])
|
||||
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
|
||||
def view_document_version_markdown(document_version_id):
|
||||
current_app.logger.debug(f'Viewing document version markdown {document_version_id}')
|
||||
# Retrieve document version
|
||||
document_version = DocumentVersion.query.get_or_404(document_version_id)
|
||||
|
||||
# retrieve tenant information
|
||||
tenant_id = session.get('tenant').get('id')
|
||||
|
||||
try:
|
||||
# Generate markdown filename
|
||||
markdown_filename = f"{document_version.id}.md"
|
||||
markdown_object_name = minio_client.generate_object_name(document_version.doc_id, document_version.language,
|
||||
document_version.id, markdown_filename)
|
||||
current_app.logger.debug(f'Markdown object name: {markdown_object_name}')
|
||||
# Download actual markdown file
|
||||
file_data = minio_client.download_document_file(
|
||||
tenant_id,
|
||||
document_version.bucket_name,
|
||||
markdown_object_name,
|
||||
)
|
||||
|
||||
# Decodeer de binaire data naar UTF-8 tekst
|
||||
markdown_content = file_data.decode('utf-8')
|
||||
current_app.logger.debug(f'Markdown content: {markdown_content}')
|
||||
|
||||
# Render de template met de markdown inhoud
|
||||
return render_template(
|
||||
'document/view_document_version_markdown.html',
|
||||
document_version=document_version,
|
||||
markdown_content=markdown_content
|
||||
)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error retrieving markdown for document version {document_version_id}: {str(e)}")
|
||||
flash(f"Error retrieving processed document: {str(e)}", "danger")
|
||||
return redirect(prefixed_url_for('document_bp.document_versions'))
|
||||
|
||||
|
||||
def refresh_all_documents():
|
||||
for doc in Document.query.all():
|
||||
refresh_document(doc.id)
|
||||
@@ -842,3 +885,16 @@ def fetch_html(url):
|
||||
|
||||
response.raise_for_status() # Will raise an exception for bad requests
|
||||
return response.content
|
||||
|
||||
|
||||
def clean_markdown(markdown):
|
||||
"""Functie die triple backticks uit markdown verwijdert"""
|
||||
markdown = markdown.strip()
|
||||
if markdown.startswith("```markdown"):
|
||||
markdown = markdown[len("```markdown"):].strip()
|
||||
elif markdown.startswith("```"):
|
||||
markdown = markdown[3:].strip()
|
||||
if markdown.endswith("```"):
|
||||
markdown = markdown[:-3].strip()
|
||||
return markdown
|
||||
|
||||
|
||||
Reference in New Issue
Block a user