- Revisiting RAG_SPECIALIST

- Adapt Catalogs & Retrievers to use specific types, removing tagging_fields
- Adding CrewAI Implementation Guide
This commit is contained in:
Josako
2025-07-08 15:54:16 +02:00
parent 33b5742d2f
commit 509ee95d81
32 changed files with 997 additions and 825 deletions

View File

@@ -50,7 +50,7 @@ def before_request():
current_app.logger.error(f'Error switching schema in Document Blueprint: {e}')
raise
# Catalog Management ------------------------------------------------------------------------------
@document_bp.route('/catalog', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def catalog():
@@ -62,6 +62,11 @@ def catalog():
form.populate_obj(new_catalog)
set_logging_information(new_catalog, dt.now(tz.utc))
# Set Type information, including the configuration for backward compatibility
new_catalog.type_version = cache_manager.catalogs_version_tree_cache.get_latest_version(new_catalog.type)
new_catalog.configuration = (cache_manager.catalogs_config_cache
.get_config(new_catalog.type, new_catalog.type_version).get("configuration", {}))
try:
db.session.add(new_catalog)
db.session.commit()
@@ -110,6 +115,7 @@ def handle_catalog_selection():
current_app.logger.info(f'Setting session catalog to {catalog.name}')
session['catalog_id'] = catalog_id
session['catalog_name'] = catalog.name
session['catalog'] = catalog.to_dict()
elif action == 'edit_catalog':
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
@@ -124,15 +130,14 @@ def edit_catalog(catalog_id):
form = EditCatalogForm(request.form, obj=catalog)
full_config = cache_manager.catalogs_config_cache.get_config(catalog.type)
form.add_dynamic_fields("configuration", full_config, catalog.configuration)
if request.method == 'POST' and form.validate_on_submit():
form.populate_obj(catalog)
catalog.configuration = form.get_dynamic_data('configuration')
update_logging_information(catalog, dt.now(tz.utc))
try:
db.session.add(catalog)
db.session.commit()
if session.get('catalog_id') == catalog_id:
session['catalog'] = catalog.to_dict()
flash('Catalog successfully updated successfully!', 'success')
current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
except SQLAlchemyError as e:
@@ -147,6 +152,7 @@ def edit_catalog(catalog_id):
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
# Processor Management ----------------------------------------------------------------------------
@document_bp.route('/processor', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def processor():
@@ -264,6 +270,7 @@ def handle_processor_selection():
return redirect(prefixed_url_for('document_bp.processors'))
# Retriever Management ----------------------------------------------------------------------------
@document_bp.route('/retriever', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def retriever():
@@ -306,8 +313,7 @@ def edit_retriever(retriever_id):
form = EditRetrieverForm(request.form, obj=retriever)
retriever_config = cache_manager.retrievers_config_cache.get_config(retriever.type, retriever.type_version)
configuration_config = retriever_config.get("configuration")
form.add_dynamic_fields("configuration", configuration_config, retriever.configuration)
form.add_dynamic_fields("configuration", retriever_config, retriever.configuration)
if form.validate_on_submit():
# Update basic fields
@@ -375,6 +381,7 @@ def handle_retriever_selection():
return redirect(prefixed_url_for('document_bp.retrievers'))
# Document Management -----------------------------------------------------------------------------
@document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def add_document():
@@ -730,6 +737,53 @@ def handle_full_document_selection():
return redirect(prefixed_url_for('document_bp.full_documents'))
@document_bp.route('/document_versions_list', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def document_versions_list():
view = DocumentVersionListView(DocumentVersion, 'document/document_versions_list_view.html', per_page=20)
return view.get()
@document_bp.route('/view_document_version_markdown/<int:document_version_id>', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def view_document_version_markdown(document_version_id):
current_app.logger.debug(f'Viewing document version markdown {document_version_id}')
# Retrieve document version
document_version = DocumentVersion.query.get_or_404(document_version_id)
# retrieve tenant information
tenant_id = session.get('tenant').get('id')
try:
# Generate markdown filename
markdown_filename = f"{document_version.id}.md"
markdown_object_name = minio_client.generate_object_name(document_version.doc_id, document_version.language,
document_version.id, markdown_filename)
current_app.logger.debug(f'Markdown object name: {markdown_object_name}')
# Download actual markdown file
file_data = minio_client.download_document_file(
tenant_id,
document_version.bucket_name,
markdown_object_name,
)
# Decodeer de binaire data naar UTF-8 tekst
markdown_content = file_data.decode('utf-8')
current_app.logger.debug(f'Markdown content: {markdown_content}')
# Render de template met de markdown inhoud
return render_template(
'document/view_document_version_markdown.html',
document_version=document_version,
markdown_content=markdown_content
)
except Exception as e:
current_app.logger.error(f"Error retrieving markdown for document version {document_version_id}: {str(e)}")
flash(f"Error retrieving processed document: {str(e)}", "danger")
return redirect(prefixed_url_for('document_bp.document_versions'))
# Utilities ---------------------------------------------------------------------------------------
@document_bp.route('/library_operations', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def library_operations():
@@ -834,52 +888,6 @@ def create_default_rag_library():
return redirect(prefixed_url_for('document_bp.library_operations'))
@document_bp.route('/document_versions_list', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def document_versions_list():
view = DocumentVersionListView(DocumentVersion, 'document/document_versions_list_view.html', per_page=20)
return view.get()
@document_bp.route('/view_document_version_markdown/<int:document_version_id>', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def view_document_version_markdown(document_version_id):
current_app.logger.debug(f'Viewing document version markdown {document_version_id}')
# Retrieve document version
document_version = DocumentVersion.query.get_or_404(document_version_id)
# retrieve tenant information
tenant_id = session.get('tenant').get('id')
try:
# Generate markdown filename
markdown_filename = f"{document_version.id}.md"
markdown_object_name = minio_client.generate_object_name(document_version.doc_id, document_version.language,
document_version.id, markdown_filename)
current_app.logger.debug(f'Markdown object name: {markdown_object_name}')
# Download actual markdown file
file_data = minio_client.download_document_file(
tenant_id,
document_version.bucket_name,
markdown_object_name,
)
# Decodeer de binaire data naar UTF-8 tekst
markdown_content = file_data.decode('utf-8')
current_app.logger.debug(f'Markdown content: {markdown_content}')
# Render de template met de markdown inhoud
return render_template(
'document/view_document_version_markdown.html',
document_version=document_version,
markdown_content=markdown_content
)
except Exception as e:
current_app.logger.error(f"Error retrieving markdown for document version {document_version_id}: {str(e)}")
flash(f"Error retrieving processed document: {str(e)}", "danger")
return redirect(prefixed_url_for('document_bp.document_versions'))
def refresh_all_documents():
for doc in Document.query.all():
refresh_document(doc.id)