- Revisiting RAG_SPECIALIST

- Adapt Catalogs & Retrievers to use specific types, removing tagging_fields
- Adding CrewAI Implementation Guide
This commit is contained in:
Josako
2025-07-08 15:54:16 +02:00
parent 33b5742d2f
commit 509ee95d81
32 changed files with 997 additions and 825 deletions

View File

@@ -11,9 +11,9 @@ from wtforms_sqlalchemy.fields import QuerySelectField
from common.extensions import cache_manager
from common.models.document import Catalog
from common.services.user import TenantServices
from common.utils.form_assistants import validate_json
from config.type_defs.catalog_types import CATALOG_TYPES
from config.type_defs.processor_types import PROCESSOR_TYPES
from .dynamic_form_base import DynamicFormBase
@@ -45,7 +45,11 @@ class CatalogForm(FlaskForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in CATALOG_TYPES.items()]
types_dict = cache_manager.catalogs_types_cache.get_types()
# Dynamically populate the 'type' field using the constructor
tenant_id = session.get('tenant').get('id')
choices = TenantServices.get_available_types_for_tenant(tenant_id, "catalogs")
self.type.choices = [(key, value['name']) for key, value in choices.items()]
class EditCatalogForm(DynamicFormBase):
@@ -55,6 +59,7 @@ class EditCatalogForm(DynamicFormBase):
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
type = StringField('Catalog Type', validators=[DataRequired()], render_kw={'readonly': True})
type_version = StringField('Catalog Type Version', validators=[DataRequired()], render_kw={'readonly': True})
# Selection fields for processing & creating embeddings
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
@@ -130,9 +135,21 @@ class RetrieverForm(FlaskForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
types_dict = cache_manager.retrievers_types_cache.get_types()
tenant_id = session.get('tenant').get('id')
choices = TenantServices.get_available_types_for_tenant(tenant_id, "retrievers")
current_app.logger.debug(f"Potential choices: {choices}")
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in types_dict.items()]
type_choices = []
for key, value in choices.items():
valid_catalog_types = value.get('valid_catalog_types', None)
if valid_catalog_types:
catalog_type = session.get('catalog').get('type')
current_app.logger.debug(f"Check {catalog_type} in {valid_catalog_types}")
if catalog_type in valid_catalog_types:
type_choices.append((key, value['name']))
else: # Retriever type is valid for all catalog types
type_choices.append((key, value['name']))
self.type.choices = type_choices
class EditRetrieverForm(DynamicFormBase):
@@ -141,6 +158,7 @@ class EditRetrieverForm(DynamicFormBase):
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = StringField('Processor Type', validators=[DataRequired()], render_kw={'readonly': True})
type_version = StringField('Retriever Type Version', validators=[DataRequired()], render_kw={'readonly': True})
tuning = BooleanField('Enable Tuning', default=False)
# Metadata fields

View File

@@ -50,7 +50,7 @@ def before_request():
current_app.logger.error(f'Error switching schema in Document Blueprint: {e}')
raise
# Catalog Management ------------------------------------------------------------------------------
@document_bp.route('/catalog', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def catalog():
@@ -62,6 +62,11 @@ def catalog():
form.populate_obj(new_catalog)
set_logging_information(new_catalog, dt.now(tz.utc))
# Set Type information, including the configuration for backward compatibility
new_catalog.type_version = cache_manager.catalogs_version_tree_cache.get_latest_version(new_catalog.type)
new_catalog.configuration = (cache_manager.catalogs_config_cache
.get_config(new_catalog.type, new_catalog.type_version).get("configuration", {}))
try:
db.session.add(new_catalog)
db.session.commit()
@@ -110,6 +115,7 @@ def handle_catalog_selection():
current_app.logger.info(f'Setting session catalog to {catalog.name}')
session['catalog_id'] = catalog_id
session['catalog_name'] = catalog.name
session['catalog'] = catalog.to_dict()
elif action == 'edit_catalog':
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
@@ -124,15 +130,14 @@ def edit_catalog(catalog_id):
form = EditCatalogForm(request.form, obj=catalog)
full_config = cache_manager.catalogs_config_cache.get_config(catalog.type)
form.add_dynamic_fields("configuration", full_config, catalog.configuration)
if request.method == 'POST' and form.validate_on_submit():
form.populate_obj(catalog)
catalog.configuration = form.get_dynamic_data('configuration')
update_logging_information(catalog, dt.now(tz.utc))
try:
db.session.add(catalog)
db.session.commit()
if session.get('catalog_id') == catalog_id:
session['catalog'] = catalog.to_dict()
flash('Catalog successfully updated successfully!', 'success')
current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
except SQLAlchemyError as e:
@@ -147,6 +152,7 @@ def edit_catalog(catalog_id):
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
# Processor Management ----------------------------------------------------------------------------
@document_bp.route('/processor', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def processor():
@@ -264,6 +270,7 @@ def handle_processor_selection():
return redirect(prefixed_url_for('document_bp.processors'))
# Retriever Management ----------------------------------------------------------------------------
@document_bp.route('/retriever', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def retriever():
@@ -306,8 +313,7 @@ def edit_retriever(retriever_id):
form = EditRetrieverForm(request.form, obj=retriever)
retriever_config = cache_manager.retrievers_config_cache.get_config(retriever.type, retriever.type_version)
configuration_config = retriever_config.get("configuration")
form.add_dynamic_fields("configuration", configuration_config, retriever.configuration)
form.add_dynamic_fields("configuration", retriever_config, retriever.configuration)
if form.validate_on_submit():
# Update basic fields
@@ -375,6 +381,7 @@ def handle_retriever_selection():
return redirect(prefixed_url_for('document_bp.retrievers'))
# Document Management -----------------------------------------------------------------------------
@document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def add_document():
@@ -730,6 +737,53 @@ def handle_full_document_selection():
return redirect(prefixed_url_for('document_bp.full_documents'))
@document_bp.route('/document_versions_list', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def document_versions_list():
view = DocumentVersionListView(DocumentVersion, 'document/document_versions_list_view.html', per_page=20)
return view.get()
@document_bp.route('/view_document_version_markdown/<int:document_version_id>', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def view_document_version_markdown(document_version_id):
current_app.logger.debug(f'Viewing document version markdown {document_version_id}')
# Retrieve document version
document_version = DocumentVersion.query.get_or_404(document_version_id)
# retrieve tenant information
tenant_id = session.get('tenant').get('id')
try:
# Generate markdown filename
markdown_filename = f"{document_version.id}.md"
markdown_object_name = minio_client.generate_object_name(document_version.doc_id, document_version.language,
document_version.id, markdown_filename)
current_app.logger.debug(f'Markdown object name: {markdown_object_name}')
# Download actual markdown file
file_data = minio_client.download_document_file(
tenant_id,
document_version.bucket_name,
markdown_object_name,
)
# Decodeer de binaire data naar UTF-8 tekst
markdown_content = file_data.decode('utf-8')
current_app.logger.debug(f'Markdown content: {markdown_content}')
# Render de template met de markdown inhoud
return render_template(
'document/view_document_version_markdown.html',
document_version=document_version,
markdown_content=markdown_content
)
except Exception as e:
current_app.logger.error(f"Error retrieving markdown for document version {document_version_id}: {str(e)}")
flash(f"Error retrieving processed document: {str(e)}", "danger")
return redirect(prefixed_url_for('document_bp.document_versions'))
# Utilities ---------------------------------------------------------------------------------------
@document_bp.route('/library_operations', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def library_operations():
@@ -834,52 +888,6 @@ def create_default_rag_library():
return redirect(prefixed_url_for('document_bp.library_operations'))
@document_bp.route('/document_versions_list', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def document_versions_list():
view = DocumentVersionListView(DocumentVersion, 'document/document_versions_list_view.html', per_page=20)
return view.get()
@document_bp.route('/view_document_version_markdown/<int:document_version_id>', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def view_document_version_markdown(document_version_id):
current_app.logger.debug(f'Viewing document version markdown {document_version_id}')
# Retrieve document version
document_version = DocumentVersion.query.get_or_404(document_version_id)
# retrieve tenant information
tenant_id = session.get('tenant').get('id')
try:
# Generate markdown filename
markdown_filename = f"{document_version.id}.md"
markdown_object_name = minio_client.generate_object_name(document_version.doc_id, document_version.language,
document_version.id, markdown_filename)
current_app.logger.debug(f'Markdown object name: {markdown_object_name}')
# Download actual markdown file
file_data = minio_client.download_document_file(
tenant_id,
document_version.bucket_name,
markdown_object_name,
)
# Decodeer de binaire data naar UTF-8 tekst
markdown_content = file_data.decode('utf-8')
current_app.logger.debug(f'Markdown content: {markdown_content}')
# Render de template met de markdown inhoud
return render_template(
'document/view_document_version_markdown.html',
document_version=document_version,
markdown_content=markdown_content
)
except Exception as e:
current_app.logger.error(f"Error retrieving markdown for document version {document_version_id}: {str(e)}")
flash(f"Error retrieving processed document: {str(e)}", "danger")
return redirect(prefixed_url_for('document_bp.document_versions'))
def refresh_all_documents():
for doc in Document.query.all():
refresh_document(doc.id)

View File

@@ -40,6 +40,7 @@ class EditPartnerServiceForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
type = StringField('Partner Service Type', validators=[DataRequired()], render_kw={'readonly': True})
type_version = StringField('Partner Service Type Version', validators=[DataRequired()], render_kw={'readonly': True})
active = BooleanField('Active', validators=[Optional()], default=True)
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])

View File

@@ -45,6 +45,7 @@ def handle_trigger_action():
return redirect(prefixed_url_for('partner_bp.trigger_actions'))
# Partner Management ------------------------------------------------------------------------------
@partner_bp.route('/partner/<int:partner_id>', methods=['GET', 'POST'])
@roles_accepted('Super User')
def edit_partner(partner_id):
@@ -124,6 +125,7 @@ def handle_partner_selection():
return redirect(prefixed_url_for('partner_bp.partners'))
# Partner Servide Management ----------------------------------------------------------------------
@partner_bp.route('/partner_service', methods=['GET', 'POST'])
@roles_accepted('Super User')
def partner_service():
@@ -160,20 +162,12 @@ def partner_service():
@roles_accepted('Super User')
def edit_partner_service(partner_service_id):
partner_service = PartnerService.query.get_or_404(partner_service_id)
partner = session.get('partner', None)
partner_id = session['partner']['id']
current_app.logger.debug(f"Request Type: {request.method}")
form = EditPartnerServiceForm(obj=partner_service)
form = EditPartnerServiceForm(request.form, obj=partner_service)
partner_service_config = cache_manager.partner_services_config_cache.get_config(partner_service.type,
partner_service.type_version)
configuration_config = partner_service_config.get('configuration')
current_app.logger.debug(f"Configuration config for {partner_service.type} {partner_service.type_version}: "
f"{configuration_config}")
form.add_dynamic_fields("configuration", partner_service_config, partner_service.configuration)
permissions_config = partner_service_config.get('permissions')
current_app.logger.debug(f"Permissions config for {partner_service.type} {partner_service.type_version}: "
f"{permissions_config}")
form.add_dynamic_fields("permissions", partner_service_config, partner_service.permissions)
if request.method == 'POST':
@@ -188,9 +182,6 @@ def edit_partner_service(partner_service_id):
current_app.logger.debug(f"Partner Service configuration: {partner_service.configuration}")
current_app.logger.debug(f"Partner Service permissions: {partner_service.permissions}")
# update partner relationship
partner_service.partner_id = partner_id
update_logging_information(partner_service, dt.now(tz.utc))
try:
@@ -258,6 +249,7 @@ def handle_partner_service_selection():
return redirect(prefixed_url_for('partner_bp.partner_services'))
# Utility Functions
def register_partner_from_tenant(tenant_id):
# check if there is already a partner defined for the tenant
partner = Partner.query.filter_by(tenant_id=tenant_id).first()

View File

@@ -36,6 +36,7 @@ def log_after_request(response):
return response
# Tenant Management -------------------------------------------------------------------------------
@user_bp.route('/tenant', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin')
def tenant():
@@ -136,6 +137,114 @@ def edit_tenant(tenant_id):
return render_template('user/tenant.html', form=form, tenant_id=tenant_id)
@user_bp.route('/select_tenant', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin') # Allow both roles
def select_tenant():
filter_form = TenantSelectionForm(request.form)
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
# Start with a base query
query = Tenant.query
current_app.logger.debug("We proberen het scherm op te bouwen")
current_app.logger.debug(f"Session: {session}")
# Apply different filters based on user role
if current_user.has_roles('Partner Admin') and 'partner' in session:
current_app.logger.debug("We zitten in partner mode")
# Get the partner's management service
management_service = next((service for service in session['partner']['services']
if service.get('type') == 'MANAGEMENT_SERVICE'), None)
if management_service:
# Get the partner's own tenant
partner_tenant_id = session['partner']['tenant_id']
# Get tenants managed by this partner through PartnerTenant relationships
managed_tenant_ids = db.session.query(PartnerTenant.tenant_id).filter_by(
partner_service_id=management_service['id']
).all()
# Convert list of tuples to flat list
managed_tenant_ids = [tenant_id for (tenant_id,) in managed_tenant_ids]
# Include partner's own tenant in the list
allowed_tenant_ids = [partner_tenant_id] + managed_tenant_ids
# Filter query to only show allowed tenants
query = query.filter(Tenant.id.in_(allowed_tenant_ids))
current_app.logger.debug("We zitten na partner service selectie")
# Apply form filters (for both Super User and Partner Admin)
if filter_form.validate_on_submit():
if filter_form.types.data:
query = query.filter(Tenant.type.in_(filter_form.types.data))
if filter_form.search.data:
search = f"%{filter_form.search.data}%"
query = query.filter(Tenant.name.ilike(search))
# Finalize query
query = query.order_by(Tenant.name)
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
tenants = pagination.items
rows = prepare_table_for_macro(tenants, [('id', ''), ('name', ''), ('website', ''), ('type', '')])
return render_template('user/select_tenant.html', rows=rows, pagination=pagination, filter_form=filter_form)
@user_bp.route('/handle_tenant_selection', methods=['POST'])
@roles_accepted('Super User', 'Partner Admin')
def handle_tenant_selection():
action = request.form['action']
if action == 'create_tenant':
return redirect(prefixed_url_for('user_bp.tenant'))
tenant_identification = request.form['selected_row']
tenant_id = ast.literal_eval(tenant_identification).get('value')
if not UserServices.can_user_edit_tenant(tenant_id):
current_app.logger.info(f"User not authenticated to edit tenant {tenant_id}.")
flash(f"You are not authenticated to manage tenant {tenant_id}", 'danger')
return redirect(prefixed_url_for('user_bp.select_tenant'))
the_tenant = Tenant.query.get(tenant_id)
# set tenant information in the session
session['tenant'] = the_tenant.to_dict()
# remove catalog-related items from the session
session.pop('catalog_id', None)
session.pop('catalog_name', None)
match action:
case 'edit_tenant':
return redirect(prefixed_url_for('user_bp.edit_tenant', tenant_id=tenant_id))
case 'select_tenant':
return redirect(prefixed_url_for('user_bp.tenant_overview'))
# Add more conditions for other actions
return redirect(prefixed_url_for('select_tenant'))
@user_bp.route('/tenant_overview', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def tenant_overview():
tenant_id = session['tenant']['id']
tenant = Tenant.query.get_or_404(tenant_id)
form = EditTenantForm(obj=tenant)
# Zet de waarde van default_tenant_make_id
if tenant.default_tenant_make_id:
form.default_tenant_make_id.data = str(tenant.default_tenant_make_id)
# Haal de naam van de default make op als deze bestaat
default_make_name = None
if tenant.default_tenant_make:
default_make_name = tenant.default_tenant_make.name
return render_template('user/tenant_overview.html', form=form, default_make_name=default_make_name)
# User Management ---------------------------------------------------------------------------------
@user_bp.route('/user', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin', 'Partner Admin')
def user():
@@ -235,94 +344,6 @@ def edit_user(user_id):
return render_template('user/edit_user.html', form=form, user_id=user_id)
@user_bp.route('/select_tenant', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin') # Allow both roles
def select_tenant():
filter_form = TenantSelectionForm(request.form)
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
# Start with a base query
query = Tenant.query
current_app.logger.debug("We proberen het scherm op te bouwen")
current_app.logger.debug(f"Session: {session}")
# Apply different filters based on user role
if current_user.has_roles('Partner Admin') and 'partner' in session:
current_app.logger.debug("We zitten in partner mode")
# Get the partner's management service
management_service = next((service for service in session['partner']['services']
if service.get('type') == 'MANAGEMENT_SERVICE'), None)
if management_service:
# Get the partner's own tenant
partner_tenant_id = session['partner']['tenant_id']
# Get tenants managed by this partner through PartnerTenant relationships
managed_tenant_ids = db.session.query(PartnerTenant.tenant_id).filter_by(
partner_service_id=management_service['id']
).all()
# Convert list of tuples to flat list
managed_tenant_ids = [tenant_id for (tenant_id,) in managed_tenant_ids]
# Include partner's own tenant in the list
allowed_tenant_ids = [partner_tenant_id] + managed_tenant_ids
# Filter query to only show allowed tenants
query = query.filter(Tenant.id.in_(allowed_tenant_ids))
current_app.logger.debug("We zitten na partner service selectie")
# Apply form filters (for both Super User and Partner Admin)
if filter_form.validate_on_submit():
if filter_form.types.data:
query = query.filter(Tenant.type.in_(filter_form.types.data))
if filter_form.search.data:
search = f"%{filter_form.search.data}%"
query = query.filter(Tenant.name.ilike(search))
# Finalize query
query = query.order_by(Tenant.name)
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
tenants = pagination.items
rows = prepare_table_for_macro(tenants, [('id', ''), ('name', ''), ('website', ''), ('type', '')])
return render_template('user/select_tenant.html', rows=rows, pagination=pagination, filter_form=filter_form)
@user_bp.route('/handle_tenant_selection', methods=['POST'])
@roles_accepted('Super User', 'Partner Admin')
def handle_tenant_selection():
action = request.form['action']
if action == 'create_tenant':
return redirect(prefixed_url_for('user_bp.tenant'))
tenant_identification = request.form['selected_row']
tenant_id = ast.literal_eval(tenant_identification).get('value')
if not UserServices.can_user_edit_tenant(tenant_id):
current_app.logger.info(f"User not authenticated to edit tenant {tenant_id}.")
flash(f"You are not authenticated to manage tenant {tenant_id}", 'danger')
return redirect(prefixed_url_for('user_bp.select_tenant'))
the_tenant = Tenant.query.get(tenant_id)
# set tenant information in the session
session['tenant'] = the_tenant.to_dict()
# remove catalog-related items from the session
session.pop('catalog_id', None)
session.pop('catalog_name', None)
match action:
case 'edit_tenant':
return redirect(prefixed_url_for('user_bp.edit_tenant', tenant_id=tenant_id))
case 'select_tenant':
return redirect(prefixed_url_for('user_bp.tenant_overview'))
# Add more conditions for other actions
return redirect(prefixed_url_for('select_tenant'))
@user_bp.route('/view_users')
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def view_users():
@@ -369,6 +390,7 @@ def handle_user_action():
return redirect(prefixed_url_for('user_bp.view_users'))
# Tenant Domain Management ------------------------------------------------------------------------
@user_bp.route('/view_tenant_domains')
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def view_tenant_domains():
@@ -461,25 +483,7 @@ def edit_tenant_domain(tenant_domain_id):
return render_template('user/edit_tenant_domain.html', form=form, tenant_domain_id=tenant_domain_id)
@user_bp.route('/tenant_overview', methods=['GET'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def tenant_overview():
tenant_id = session['tenant']['id']
tenant = Tenant.query.get_or_404(tenant_id)
form = EditTenantForm(obj=tenant)
# Zet de waarde van default_tenant_make_id
if tenant.default_tenant_make_id:
form.default_tenant_make_id.data = str(tenant.default_tenant_make_id)
# Haal de naam van de default make op als deze bestaat
default_make_name = None
if tenant.default_tenant_make:
default_make_name = tenant.default_tenant_make.name
return render_template('user/tenant_overview.html', form=form, default_make_name=default_make_name)
# Tenant Project Management -----------------------------------------------------------------------
@user_bp.route('/tenant_project', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def tenant_project():
@@ -639,6 +643,7 @@ def delete_tenant_project(tenant_project_id):
return redirect(prefixed_url_for('user_bp.tenant_projects'))
# Tenant Make Management --------------------------------------------------------------------------
@user_bp.route('/tenant_make', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Partner Admin', 'Tenant Admin')
def tenant_make():