diff --git a/common/services/entitlements/license_period_services.py b/common/services/entitlements/license_period_services.py
index b87596e..569fde1 100644
--- a/common/services/entitlements/license_period_services.py
+++ b/common/services/entitlements/license_period_services.py
@@ -68,7 +68,7 @@ class LicensePeriodServices:
# Status is PENDING, so no prepaid payment received. There is no license period we can use.
# We allow for a delay of 5 days before raising an exception.
current_date = dt.now(tz.utc).date()
- delta = abs(current_date - license_period.period_start_date)
+ delta = abs(current_date - license_period.period_start)
if delta > timedelta(days=current_app.config.get('ENTITLEMENTS_MAX_PENDING_DAYS', 5)):
raise EveAIPendingLicensePeriod()
case PeriodStatus.ACTIVE:
diff --git a/common/utils/template_filters.py b/common/utils/template_filters.py
index d42b47b..4527ef1 100644
--- a/common/utils/template_filters.py
+++ b/common/utils/template_filters.py
@@ -1,6 +1,8 @@
# common/utils/filters.py
import pytz
+import markdown
+from markupsafe import Markup
from datetime import datetime
from common.utils.nginx_utils import prefixed_url_for as puf
@@ -43,6 +45,44 @@ def status_color(status_name):
return colors.get(status_name, 'secondary')
+def render_markdown(text):
+ """
+ Renders markdown to HTML using Python's markdown library.
+ Includes common extensions for better rendering.
+ """
+ if not text:
+ return ""
+
+ # Verwijder de triple backticks en markdown label
+ text = clean_markdown(text)
+
+ # Render de markdown met extensies
+ return Markup(markdown.markdown(text, extensions=[
+ 'markdown.extensions.fenced_code',
+ 'markdown.extensions.codehilite',
+ 'markdown.extensions.tables',
+ 'markdown.extensions.toc'
+ ]))
+
+
+def clean_markdown(text):
+ """
+ Verwijdert triple backticks en markdown aanduiding uit de tekst
+ """
+ if not text:
+ return ""
+
+ text = text.strip()
+ if text.startswith("```markdown"):
+ text = text[len("```markdown"):].strip()
+ elif text.startswith("```"):
+ text = text[3:].strip()
+ if text.endswith("```"):
+ text = text[:-3].strip()
+
+ return text
+
+
def prefixed_url_for(endpoint):
return puf(endpoint)
@@ -55,5 +95,7 @@ def register_filters(app):
app.jinja_env.filters['time_difference'] = time_difference
app.jinja_env.filters['status_color'] = status_color
app.jinja_env.filters['prefixed_url_for'] = prefixed_url_for
+ app.jinja_env.filters['markdown'] = render_markdown
+ app.jinja_env.filters['clean_markdown'] = clean_markdown
app.jinja_env.globals['prefixed_url_for'] = prefixed_url_for
diff --git a/eveai_app/templates/document/document_versions.html b/eveai_app/templates/document/document_versions.html
index b32c6f9..7c6e80a 100644
--- a/eveai_app/templates/document/document_versions.html
+++ b/eveai_app/templates/document/document_versions.html
@@ -13,6 +13,7 @@
{{ render_selectable_table(headers=["ID", "URL", "Object Name", "File Type", "Process.", "Proces. Start", "Proces. Finish", "Proces. Error"], rows=rows, selectable=True, id="versionsTable") }}
+
@@ -21,4 +22,4 @@
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.documents') }}
-{% endblock %}
\ No newline at end of file
+{% endblock %}
diff --git a/eveai_app/templates/document/document_versions_list_view.html b/eveai_app/templates/document/document_versions_list_view.html
index 7359736..2a759be 100644
--- a/eveai_app/templates/document/document_versions_list_view.html
+++ b/eveai_app/templates/document/document_versions_list_view.html
@@ -37,6 +37,7 @@
+
diff --git a/eveai_app/templates/document/view_document_version_markdown.html b/eveai_app/templates/document/view_document_version_markdown.html
new file mode 100644
index 0000000..2e1def3
--- /dev/null
+++ b/eveai_app/templates/document/view_document_version_markdown.html
@@ -0,0 +1,115 @@
+{% extends "base.html" %}
+{% block title %}Document Version Markdown{% endblock %}
+
+{% block content_title %}Document Version Markdown{% endblock %}
+{% block content_description %}Markdown inhoud van document versie {{ document_version.id }}.{% endblock %}
+
+{% block content %}
+
+
+
+
+
Document ID: {{ document_version.doc_id }}
+
Versie ID: {{ document_version.id }}
+
Object Naam: {{ document_version.object_name }}
+
Taal: {{ document_version.language }}
+
+
+
+
+
+
+
+
+
{{ markdown_content }}
+
+
+
+
+ {{ markdown_content | markdown }}
+
+
+
+
+{% endblock %}
+
+{% block styles %}
+{{ super() }}
+
+
+{% endblock %}
+
+{% block scripts %}
+{{ super() }}
+
+{% endblock %}
\ No newline at end of file
diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py
index 2f8fcf3..ebb21fb 100644
--- a/eveai_app/views/document_views.py
+++ b/eveai_app/views/document_views.py
@@ -12,7 +12,7 @@ from requests.exceptions import SSLError
import json
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
-from common.extensions import db, cache_manager
+from common.extensions import db, cache_manager, minio_client
from common.models.interaction import Specialist, SpecialistRetriever
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
edit_document, \
@@ -215,7 +215,7 @@ def edit_processor(processor_id):
try:
db.session.add(processor)
db.session.commit()
- flash('Retriever updated successfully!', 'success')
+ flash('Processor updated successfully!', 'success')
current_app.logger.info(f'Processor {processor.id} updated successfully')
except SQLAlchemyError as e:
db.session.rollback()
@@ -649,6 +649,7 @@ def handle_document_version_selection():
return redirect(prefixed_url_for('document_bp.document_versions_list'))
action = request.form['action']
+ current_app.logger.debug(f'Action: {action}')
match action:
case 'edit_document_version':
@@ -656,6 +657,9 @@ def handle_document_version_selection():
case 'process_document_version':
process_version(doc_vers_id)
# Add more conditions for other actions
+ case 'view_document_version_markdown':
+ return redirect(prefixed_url_for('document_bp.view_document_version_markdown',
+ document_version_id=doc_vers_id))
doc_vers = DocumentVersion.query.get_or_404(doc_vers_id)
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_vers.doc_id))
@@ -772,6 +776,45 @@ def document_versions_list():
return view.get()
+@document_bp.route('/view_document_version_markdown/', methods=['GET'])
+@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
+def view_document_version_markdown(document_version_id):
+ current_app.logger.debug(f'Viewing document version markdown {document_version_id}')
+ # Retrieve document version
+ document_version = DocumentVersion.query.get_or_404(document_version_id)
+
+ # retrieve tenant information
+ tenant_id = session.get('tenant').get('id')
+
+ try:
+ # Generate markdown filename
+ markdown_filename = f"{document_version.id}.md"
+ markdown_object_name = minio_client.generate_object_name(document_version.doc_id, document_version.language,
+ document_version.id, markdown_filename)
+ current_app.logger.debug(f'Markdown object name: {markdown_object_name}')
+ # Download actual markdown file
+ file_data = minio_client.download_document_file(
+ tenant_id,
+ document_version.bucket_name,
+ markdown_object_name,
+ )
+
+ # Decodeer de binaire data naar UTF-8 tekst
+ markdown_content = file_data.decode('utf-8')
+ current_app.logger.debug(f'Markdown content: {markdown_content}')
+
+ # Render de template met de markdown inhoud
+ return render_template(
+ 'document/view_document_version_markdown.html',
+ document_version=document_version,
+ markdown_content=markdown_content
+ )
+ except Exception as e:
+ current_app.logger.error(f"Error retrieving markdown for document version {document_version_id}: {str(e)}")
+ flash(f"Error retrieving processed document: {str(e)}", "danger")
+ return redirect(prefixed_url_for('document_bp.document_versions'))
+
+
def refresh_all_documents():
for doc in Document.query.all():
refresh_document(doc.id)
@@ -842,3 +885,16 @@ def fetch_html(url):
response.raise_for_status() # Will raise an exception for bad requests
return response.content
+
+
+def clean_markdown(markdown):
+ """Functie die triple backticks uit markdown verwijdert"""
+ markdown = markdown.strip()
+ if markdown.startswith("```markdown"):
+ markdown = markdown[len("```markdown"):].strip()
+ elif markdown.startswith("```"):
+ markdown = markdown[3:].strip()
+ if markdown.endswith("```"):
+ markdown = markdown[:-3].strip()
+ return markdown
+
diff --git a/eveai_workers/processors/html_processor.py b/eveai_workers/processors/html_processor.py
index 711cca6..6e14c3b 100644
--- a/eveai_workers/processors/html_processor.py
+++ b/eveai_workers/processors/html_processor.py
@@ -106,7 +106,7 @@ class HTMLProcessor(BaseProcessor):
current_chunk = []
current_size = 0
- for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
+ for element in soup.find_all(self.html_tags):
element_html = str(element)
element_size = len(element_html)
diff --git a/requirements.txt b/requirements.txt
index 0452266..6d3c506 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -91,4 +91,5 @@ contextvars~=2.4
pandas~=2.2.3
prometheus_client~=0.21.1
scaleway~=2.9.0
-html2text~=2025.4.15
\ No newline at end of file
+html2text~=2025.4.15
+markdown~=3.8