From 43547287b180532c8607bcabacb962a87da5978f Mon Sep 17 00:00:00 2001 From: Josako Date: Tue, 29 Oct 2024 09:17:44 +0100 Subject: [PATCH] - Refining & Enhancing dynamic fields - Creating a specialized Form class for handling dynamic fields - Refinement of HTML-macros to handle dynamic fields - Introduction of dynamic fields for Catalogs --- common/langchain/eveai_dossier_retriever.py | 154 ++++++++++++++++ common/models/document.py | 2 +- config/catalog_types.py | 46 ++++- docker/build_and_push_eveai.sh | 3 +- docker/release_and_tag_eveai.sh | 1 + .../templates/document/edit_catalog.html | 18 +- .../templates/document/edit_retriever.html | 10 +- eveai_app/templates/macros.html | 137 +++++++++++--- eveai_app/templates/scripts.html | 54 ++++++ eveai_app/views/document_forms.py | 45 +++-- eveai_app/views/document_views.py | 61 +------ eveai_app/views/dynamic_form_base.py | 170 +++++++++++++++--- ...emoving_parent_catalog_id_from_catalog_.py | 33 ++++ 13 files changed, 605 insertions(+), 129 deletions(-) create mode 100644 common/langchain/eveai_dossier_retriever.py create mode 100644 migrations/tenant/versions/b64d5cf32c7a_removing_parent_catalog_id_from_catalog_.py diff --git a/common/langchain/eveai_dossier_retriever.py b/common/langchain/eveai_dossier_retriever.py new file mode 100644 index 0000000..e54b8a6 --- /dev/null +++ b/common/langchain/eveai_dossier_retriever.py @@ -0,0 +1,154 @@ +from langchain_core.retrievers import BaseRetriever +from sqlalchemy import func, and_, or_, desc, cast, JSON +from sqlalchemy.exc import SQLAlchemyError +from pydantic import BaseModel, Field, PrivateAttr +from typing import Any, Dict, List, Optional +from flask import current_app +from contextlib import contextmanager + +from common.extensions import db +from common.models.document import Document, DocumentVersion, Catalog +from common.utils.datetime_utils import get_date_in_timezone +from common.utils.model_utils import ModelVariables + + +class EveAIDossierRetriever(BaseRetriever, BaseModel): + _catalog_id: int = PrivateAttr() + _model_variables: ModelVariables = PrivateAttr() + _tenant_info: Dict[str, Any] = PrivateAttr() + _active_filters: Optional[Dict[str, Any]] = PrivateAttr() + + def __init__(self, catalog_id: int, model_variables: ModelVariables, tenant_info: Dict[str, Any]): + super().__init__() + self._catalog_id = catalog_id + self._model_variables = model_variables + self._tenant_info = tenant_info + self._active_filters = None + + @contextmanager + def filtering(self, metadata_filters: Dict[str, Any]): + """Context manager for temporarily setting metadata filters""" + previous_filters = self._active_filters + self._active_filters = metadata_filters + try: + yield self + finally: + self._active_filters = previous_filters + + def _build_metadata_filter_conditions(self, query): + """Build SQL conditions for metadata filtering""" + if not self._active_filters: + return query + + conditions = [] + for field, value in self._active_filters.items(): + if value is None: + continue + + # Handle both single values and lists of values + if isinstance(value, (list, tuple)): + # Multiple values - create OR condition + or_conditions = [] + for val in value: + or_conditions.append( + cast(DocumentVersion.user_metadata[field].astext, JSON) == str(val) + ) + if or_conditions: + conditions.append(or_(*or_conditions)) + else: + # Single value - direct comparison + conditions.append( + cast(DocumentVersion.user_metadata[field].astext, JSON) == str(value) + ) + + if conditions: + query = query.filter(and_(*conditions)) + + return query + + def _get_relevant_documents(self, query: str): + current_app.logger.debug(f'Retrieving relevant documents for dossier query: {query}') + if self._active_filters: + current_app.logger.debug(f'Using metadata filters: {self._active_filters}') + + query_embedding = self._get_query_embedding(query) + db_class = self.model_variables['embedding_db_model'] + similarity_threshold = self.model_variables['similarity_threshold'] + k = self.model_variables['k'] + + try: + current_date = get_date_in_timezone(self.tenant_info['timezone']) + + # Subquery to find the latest version of each document + subquery = ( + db.session.query( + DocumentVersion.doc_id, + func.max(DocumentVersion.id).label('latest_version_id') + ) + .group_by(DocumentVersion.doc_id) + .subquery() + ) + + # Build base query + # Build base query + query_obj = ( + db.session.query(db_class, + (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity')) + .join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id) + .join(Document, DocumentVersion.doc_id == Document.id) + .join(subquery, DocumentVersion.id == subquery.c.latest_version_id) + .filter( + or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date), + or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date), + (1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold, + Document.catalog_id == self._catalog_id + ) + ) + + # Apply metadata filters + query_obj = self._build_metadata_filter_conditions(query_obj) + + # Order and limit results + query_obj = query_obj.order_by(desc('similarity')).limit(k) + + # Debug logging for RAG tuning if enabled + if self.model_variables['rag_tuning']: + self._log_rag_tuning(query_obj, query_embedding) + + res = query_obj.all() + + result = [] + for doc in res: + if self.model_variables['rag_tuning']: + current_app.logger.debug(f'Document ID: {doc[0].id} - Distance: {doc[1]}\n') + current_app.logger.debug(f'Chunk: \n {doc[0].chunk}\n\n') + result.append(f'SOURCE: {doc[0].id}\n\n{doc[0].chunk}\n\n') + + except SQLAlchemyError as e: + current_app.logger.error(f'Error retrieving relevant documents: {e}') + db.session.rollback() + return [] + + return result + + def _log_rag_tuning(self, query_obj, query_embedding): + """Log debug information for RAG tuning""" + current_app.rag_tuning_logger.debug("Debug: Query execution plan:") + current_app.rag_tuning_logger.debug(f"{query_obj.statement}") + if self._active_filters: + current_app.rag_tuning_logger.debug("Debug: Active metadata filters:") + current_app.rag_tuning_logger.debug(f"{self._active_filters}") + + def _get_query_embedding(self, query: str): + """Get embedding for the query text""" + embedding_model = self.model_variables['embedding_model'] + query_embedding = embedding_model.embed_query(query) + return query_embedding + + @property + def model_variables(self) -> ModelVariables: + return self._model_variables + + @property + def tenant_info(self) -> Dict[str, Any]: + return self._tenant_info \ No newline at end of file diff --git a/common/models/document.py b/common/models/document.py index 3d9d4c9..990e8d2 100644 --- a/common/models/document.py +++ b/common/models/document.py @@ -8,7 +8,6 @@ import sqlalchemy as sa class Catalog(db.Model): id = db.Column(db.Integer, primary_key=True) - parent_id = db.Column(db.Integer, db.ForeignKey('catalog.id'), nullable=True) name = db.Column(db.String(50), nullable=False) description = db.Column(db.Text, nullable=True) type = db.Column(db.String(50), nullable=False, default="DEFAULT_CATALOG") @@ -95,6 +94,7 @@ class DocumentVersion(db.Model): system_context = db.Column(db.Text, nullable=True) user_metadata = db.Column(JSONB, nullable=True) system_metadata = db.Column(JSONB, nullable=True) + catalog_configuration = db.Column(JSONB, nullable=True) # Versioning Information created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now()) diff --git a/config/catalog_types.py b/config/catalog_types.py index 1346bdf..8e67fcc 100644 --- a/config/catalog_types.py +++ b/config/catalog_types.py @@ -2,10 +2,52 @@ CATALOG_TYPES = { "DEFAULT": { "name": "Default Catalog", - "Description": "Default Catalog" + "Description": "A Catalog with information in Evie's Library, to be considered as a whole", + "configuration": {} }, "DOSSIER": { "name": "Dossier Catalog", - "Description": "A Catalog in which several Dossiers can be stored" + "Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored", + "configuration": { + "tagging_fields": { + "name": "Tagging Fields", + "type": "tagging_fields", + "description": """Define the metadata fields that will be used for tagging documents. + Each field must have: + - type: one of 'string', 'integer', 'float', 'date', 'enum' + - required: boolean indicating if the field is mandatory + - description: field description + - allowed_values: list of values (for enum type only) + - min_value/max_value: range limits (for numeric types only)""", + "required": True, + "default": {}, + "field_properties": { + "type": { + "allowed_values": ["string", "integer", "float", "date", "enum"], + "required": True + }, + "required": { + "type": "boolean", + "default": False + }, + "description": { + "type": "string" + }, + "allowed_values": { + "type": "list", + "description": "For enum type fields only" + }, + "min_value": { + "type": "number", + "description": "For numeric fields only" + }, + "max_value": { + "type": "number", + "description": "For numeric fields only" + } + } + } + }, + "document_version_user_metadata": ["tagging_fields"] }, } diff --git a/docker/build_and_push_eveai.sh b/docker/build_and_push_eveai.sh index b0880c9..4913172 100755 --- a/docker/build_and_push_eveai.sh +++ b/docker/build_and_push_eveai.sh @@ -169,4 +169,5 @@ for SERVICE in "${SERVICES[@]}"; do fi done -echo "All specified services processed." \ No newline at end of file +echo "All specified services processed." +echo "Finished at $(date +"%d/%m/%Y %H:%M:%S")" \ No newline at end of file diff --git a/docker/release_and_tag_eveai.sh b/docker/release_and_tag_eveai.sh index 55139dc..dc83bab 100755 --- a/docker/release_and_tag_eveai.sh +++ b/docker/release_and_tag_eveai.sh @@ -59,3 +59,4 @@ git tag -a v$RELEASE_VERSION -m "Release $RELEASE_VERSION: $RELEASE_MESSAGE" git push origin v$RELEASE_VERSION echo "Release process completed for version: $RELEASE_VERSION" +echo "Finished at $(date +"%d/%m/%Y %H:%M:%S")" \ No newline at end of file diff --git a/eveai_app/templates/document/edit_catalog.html b/eveai_app/templates/document/edit_catalog.html index 51a548b..86d1c05 100644 --- a/eveai_app/templates/document/edit_catalog.html +++ b/eveai_app/templates/document/edit_catalog.html @@ -11,12 +11,22 @@ When you change chunking of embedding information, you'll need to manually refre {% block content %}
{{ form.hidden_tag() }} - {% set disabled_fields = [] %} - {% set exclude_fields = [] %} - {% for field in form %} + {% set disabled_fields = ['type'] %} + {% set exclude_fields = [] %} + + {% for field in form.get_static_fields() %} {{ render_field(field, disabled_fields, exclude_fields) }} {% endfor %} - + + {% for collection_name, fields in form.get_dynamic_fields().items() %} + {% if fields|length > 0 %} +

{{ collection_name }}

+ {% endif %} + {% for field in fields %} + {{ render_field(field, disabled_fields, exclude_fields) }} + {% endfor %} + {% endfor %} +
{% endblock %} diff --git a/eveai_app/templates/document/edit_retriever.html b/eveai_app/templates/document/edit_retriever.html index f8af75c..3731c98 100644 --- a/eveai_app/templates/document/edit_retriever.html +++ b/eveai_app/templates/document/edit_retriever.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% from "macros.html" import render_field2, render_dynamic_fields %} +{% from "macros.html" import render_field %} {% block title %}Edit Retriever{% endblock %} @@ -13,13 +13,15 @@ {% set exclude_fields = [] %} {% for field in form.get_static_fields() %} - {{ render_field2(field, disabled_fields, exclude_fields) }} + {{ render_field(field, disabled_fields, exclude_fields) }} {% endfor %} {% for collection_name, fields in form.get_dynamic_fields().items() %} -

{{ collection_name }}

+ {% if fields|length > 0 %} +

{{ collection_name }}

+ {% endif %} {% for field in fields %} - {{ render_field2(field, disabled_fields, exclude_fields) }} + {{ render_field(field, disabled_fields, exclude_fields) }} {% endfor %} {% endfor %} diff --git a/eveai_app/templates/macros.html b/eveai_app/templates/macros.html index e75ff28..49ba40d 100644 --- a/eveai_app/templates/macros.html +++ b/eveai_app/templates/macros.html @@ -1,29 +1,29 @@ -{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %} - {% set disabled = field.name in disabled_fields %} - {% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %} - {% if field.name not in exclude_fields %} - {% if field.type == 'BooleanField' %} -
- {{ field(class="form-check-input " + class, type="checkbox", id="flexSwitchCheckDefault") }} - {{ field.label(class="form-check-label", for="flexSwitchCheckDefault", disabled=disabled) }} -
- {% else %} -
- {{ field.label(class="form-label") }} - {{ field(class="form-control " + class, disabled=disabled) }} - {% if field.errors %} -
- {% for error in field.errors %} - {{ error }} - {% endfor %} -
- {% endif %} -
- {% endif %} - {% endif %} -{% endmacro %} + + + + + + + + + + + + + + + + + + + + + + + + -{% macro render_field2(field, disabled_fields=[], exclude_fields=[], class='') %} +{% macro render_field_old(field, disabled_fields=[], exclude_fields=[], class='') %} @@ -34,7 +34,14 @@
{{ field(class="form-check-input " + class, disabled=disabled) }} - {{ field.label(class="form-check-label") }} + {% if field.description %} + {{ field.label(class="form-check-label", + **{'data-bs-toggle': 'tooltip', + 'data-bs-placement': 'right', + 'title': field.description}) }} + {% else %} + {{ field.label(class="form-check-label") }} + {% endif %}
{% if field.errors %}
@@ -46,8 +53,82 @@
{% else %}
- {{ field.label(class="form-label") }} - {{ field(class="form-control " + class, disabled=disabled) }} + {% if field.description %} + {{ field.label(class="form-label", + **{'data-bs-toggle': 'tooltip', + 'data-bs-placement': 'right', + 'title': field.description}) }} + {% else %} + {{ field.label(class="form-label") }} + {% endif %} + + {% if field.type == 'TextAreaField' and 'json-editor' in class %} +
+ {{ field(class="form-control d-none " + class, disabled=disabled) }} + {% else %} + {{ field(class="form-control " + class, disabled=disabled) }} + {% endif %} + + {% if field.errors %} +
+ {% for error in field.errors %} + {{ error }} + {% endfor %} +
+ {% endif %} +
+ {% endif %} + {% endif %} +{% endmacro %} + +{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %} + + + + {% set disabled = field.name in disabled_fields %} + {% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %} + {% if field.name not in exclude_fields %} + {% if field.type == 'BooleanField' %} +
+
+ {{ field(class="form-check-input " + class, disabled=disabled) }} + {% if field.description %} + {{ field.label(class="form-check-label", + **{'data-bs-toggle': 'tooltip', + 'data-bs-placement': 'right', + 'title': field.description}) }} + {% else %} + {{ field.label(class="form-check-label") }} + {% endif %} +
+ {% if field.errors %} +
+ {% for error in field.errors %} + {{ error }} + {% endfor %} +
+ {% endif %} +
+ {% else %} +
+ {% if field.description %} + {{ field.label(class="form-label", + **{'data-bs-toggle': 'tooltip', + 'data-bs-placement': 'right', + 'title': field.description}) }} + {% else %} + {{ field.label(class="form-label") }} + {% endif %} + + {% if field.type == 'TextAreaField' and 'json-editor' in class %} +
+ {{ field(class="form-control d-none " + class, disabled=disabled) }} + {% elif field.type == 'SelectField' %} + {{ field(class="form-control form-select " + class, disabled=disabled) }} + {% else %} + {{ field(class="form-control " + class, disabled=disabled) }} + {% endif %} + {% if field.errors %}
{% for error in field.errors %} diff --git a/eveai_app/templates/scripts.html b/eveai_app/templates/scripts.html index e30d72c..5be7180 100644 --- a/eveai_app/templates/scripts.html +++ b/eveai_app/templates/scripts.html @@ -14,4 +14,58 @@ + + + + + diff --git a/eveai_app/views/document_forms.py b/eveai_app/views/document_forms.py index b035110..8ad387a 100644 --- a/eveai_app/views/document_forms.py +++ b/eveai_app/views/document_forms.py @@ -8,6 +8,7 @@ import json from wtforms_sqlalchemy.fields import QuerySelectField +from common.extensions import db from common.models.document import Catalog from config.catalog_types import CATALOG_TYPES @@ -34,14 +35,6 @@ def validate_json(form, field): class CatalogForm(FlaskForm): name = StringField('Name', validators=[DataRequired(), Length(max=50)]) description = TextAreaField('Description', validators=[Optional()]) - # Parent ID (Optional for root-level catalogs) - parent = QuerySelectField( - 'Parent Catalog', - query_factory=lambda: Catalog.query.all(), - allow_blank=True, - get_label='name', - validators=[Optional()], - ) # Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config) type = SelectField('Catalog Type', validators=[DataRequired()]) @@ -56,8 +49,9 @@ class CatalogForm(FlaskForm): default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td') html_end_tags = StringField('HTML End Tags', validators=[DataRequired()], default='p, li') - html_included_elements = StringField('HTML Included Elements', validators=[Optional()]) - html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()]) + html_included_elements = StringField('HTML Included Elements', validators=[Optional()], default='article, main') + html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()], + default='header, footer, nav, script') html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()]) min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], default=2000) @@ -75,6 +69,37 @@ class CatalogForm(FlaskForm): self.type.choices = [(key, value['name']) for key, value in CATALOG_TYPES.items()] +class EditCatalogForm(DynamicFormBase): + name = StringField('Name', validators=[DataRequired(), Length(max=50)]) + description = TextAreaField('Description', validators=[Optional()]) + + # Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config) + type = StringField('Catalog Type', validators=[DataRequired()], render_kw={'readonly': True}) + + # Metadata fields + user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json]) + system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json],) + + # HTML Embedding Variables + html_tags = StringField('HTML Tags', validators=[DataRequired()], + default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td') + html_end_tags = StringField('HTML End Tags', validators=[DataRequired()], + default='p, li') + html_included_elements = StringField('HTML Included Elements', validators=[Optional()], default='article, main') + html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()], + default='header, footer, nav, script') + html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()]) + min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], + default=2000) + max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()], + default=3000) + # Chat Variables + chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)]) + chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)]) + # Tuning variables + embed_tuning = BooleanField('Enable Embedding Tuning', default=False) + + class RetrieverForm(FlaskForm): name = StringField('Name', validators=[DataRequired(), Length(max=50)]) description = TextAreaField('Description', validators=[Optional()]) diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py index 260163f..2996002 100644 --- a/eveai_app/views/document_views.py +++ b/eveai_app/views/document_views.py @@ -22,14 +22,14 @@ from common.utils.document_utils import validate_file_type, create_document_stac from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \ EveAIDoubleURLException from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \ - CatalogForm, RetrieverForm, EditRetrieverForm + CatalogForm, EditCatalogForm, RetrieverForm, EditRetrieverForm from common.utils.middleware import mw_before_request from common.utils.celery_utils import current_celery from common.utils.nginx_utils import prefixed_url_for from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict from .document_list_view import DocumentListView from .document_version_list_view import DocumentVersionListView - +from config.catalog_types import CATALOG_TYPES from config.retriever_types import RETRIEVER_TYPES document_bp = Blueprint('document_bp', __name__, url_prefix='/document') @@ -67,7 +67,6 @@ def catalog(): tenant_id = session.get('tenant').get('id') new_catalog = Catalog() form.populate_obj(new_catalog) - new_catalog.parent_id = form.parent.data.get('id') # Handle Embedding Variables new_catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else [] new_catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \ @@ -135,9 +134,12 @@ def handle_catalog_selection(): @roles_accepted('Super User', 'Tenant Admin') def edit_catalog(catalog_id): catalog = Catalog.query.get_or_404(catalog_id) - form = CatalogForm(obj=catalog) tenant_id = session.get('tenant').get('id') + form = EditCatalogForm(request.form, obj=catalog) + configuration_config = CATALOG_TYPES[catalog.type]["configuration"] + form.add_dynamic_fields("configuration", configuration_config, catalog.configuration) + # Convert arrays to comma-separated strings for display if request.method == 'GET': form.html_tags.data = ', '.join(catalog.html_tags or '') @@ -158,6 +160,8 @@ def edit_catalog(catalog_id): if form.html_excluded_elements.data else [] catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \ if form.html_excluded_classes.data else [] + + catalog.configuration = form.get_dynamic_data('configuration') update_logging_information(catalog, dt.now(tz.utc)) try: db.session.add(catalog) @@ -210,8 +214,6 @@ def retriever(): @roles_accepted('Super User', 'Tenant Admin') def edit_retriever(retriever_id): """Edit an existing retriever configuration.""" - current_app.logger.debug(f"Editing Retriever {retriever_id}") - # Get the retriever or return 404 retriever = Retriever.query.get_or_404(retriever_id) @@ -225,7 +227,6 @@ def edit_retriever(retriever_id): form = EditRetrieverForm(request.form, obj=retriever) configuration_config = RETRIEVER_TYPES[retriever.type]["configuration"] - current_app.logger.debug(f"Configuration {configuration_config}") form.add_dynamic_fields("configuration", configuration_config, retriever.configuration) if form.validate_on_submit(): @@ -663,49 +664,3 @@ def fetch_html(url): response.raise_for_status() # Will raise an exception for bad requests return response.content - - -# def prepare_document_data(docs): -# rows = [] -# for doc in docs: -# doc_row = [{'value': doc.name, 'class': '', 'type': 'text'}, -# {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}] -# # Document basic details -# if doc.valid_from: -# doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'}) -# else: -# doc_row.append({'value': '', 'class': '', 'type': 'text'}) -# -# # Nested languages and versions -# languages_rows = [] -# for lang in doc.languages: -# lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}] -# -# # Latest version details if available (should be available ;-) ) -# if lang.latest_version: -# lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"), -# 'class': '', 'type': 'text'}) -# if lang.latest_version.url: -# lang_row.append({'value': lang.latest_version.url, -# 'class': '', 'type': 'link', 'href': lang.latest_version.url}) -# else: -# lang_row.append({'value': '', 'class': '', 'type': 'text'}) -# -# if lang.latest_version.object_name: -# lang_row.append({'value': lang.latest_version.object_name, 'class': '', 'type': 'text'}) -# else: -# lang_row.append({'value': '', 'class': '', 'type': 'text'}) -# -# if lang.latest_version.file_type: -# lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'}) -# else: -# lang_row.append({'value': '', 'class': '', 'type': 'text'}) -# # Include other details as necessary -# -# languages_rows.append(lang_row) -# -# doc_row.append({'is_group': True, 'colspan': '5', -# 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'], -# 'sub_rows': languages_rows}) -# rows.append(doc_row) -# return rows diff --git a/eveai_app/views/dynamic_form_base.py b/eveai_app/views/dynamic_form_base.py index 46ba100..28094a1 100644 --- a/eveai_app/views/dynamic_form_base.py +++ b/eveai_app/views/dynamic_form_base.py @@ -1,6 +1,11 @@ from flask_wtf import FlaskForm -from wtforms import IntegerField, FloatField, BooleanField, StringField, validators +from wtforms import IntegerField, FloatField, BooleanField, StringField, TextAreaField, validators, ValidationError from flask import current_app +import json + +from wtforms.fields.choices import SelectField +from wtforms.fields.datetime import DateField + class DynamicFormBase(FlaskForm): def __init__(self, formdata=None, *args, **kwargs): @@ -10,6 +15,32 @@ class DynamicFormBase(FlaskForm): # Store formdata for later use self.formdata = formdata + def _create_field_validators(self, field_def): + """Create validators based on field definition""" + validators_list = [] + + # Required validator + if field_def.get('required', False): + validators_list.append(validators.InputRequired()) + else: + validators_list.append(validators.Optional()) + + # Type-specific validators + field_type = field_def.get('type') + if field_type in ['integer', 'float']: + min_value = field_def.get('min_value') + max_value = field_def.get('max_value') + if min_value is not None or max_value is not None: + validators_list.append( + validators.NumberRange( + min=min_value if min_value is not None else -float('inf'), + max=max_value if max_value is not None else float('inf'), + message=f"Value must be between {min_value or '-∞'} and {max_value or '∞'}" + ) + ) + + return validators_list + def add_dynamic_fields(self, collection_name, config, initial_data=None): """Add dynamic fields to the form based on the configuration.""" self.dynamic_fields[collection_name] = [] @@ -17,45 +48,77 @@ class DynamicFormBase(FlaskForm): current_app.logger.debug(f"{field_name}: {field_def}") # Prefix the field name with the collection name full_field_name = f"{collection_name}_{field_name}" + label = field_def.get('name') field_type = field_def.get('type') description = field_def.get('description', '') required = field_def.get('required', False) default = field_def.get('default') - # Determine validators - field_validators = [validators.InputRequired()] if required else [validators.Optional()] + # Determine standard validators + field_validators = self._create_field_validators(field_def) - # Map the field type to WTForms field classes - field_class = { - 'int': IntegerField, - 'float': FloatField, - 'boolean': BooleanField, - 'string': StringField, - }.get(field_type, StringField) + # Handle special case for tagging_fields + if field_type == 'tagging_fields': + field_class = TextAreaField + field_validators.append(validate_tagging_fields) + extra_classes = 'json-editor' + field_kwargs = {} + elif field_type == 'enum': + field_class = SelectField + allowed_values = field_def.get('allowed_values', []) + choices = [(str(val), str(val)) for val in allowed_values] + extra_classes = '' + field_kwargs = {'choices': choices} + else: + extra_classes = '' + field_class = { + 'integer': IntegerField, + 'float': FloatField, + 'boolean': BooleanField, + 'string': StringField, + 'date': DateField, + }.get(field_type, StringField) + field_kwargs = {} - # Create the field instance - unbound_field = field_class( - label=description, - validators=field_validators, - default=default - ) + # Prepare field data + field_data = None + if initial_data and field_name in initial_data: + field_data = initial_data[field_name] + if field_type == 'tagging_fields' and isinstance(field_data, dict): + try: + field_data = json.dumps(field_data, indent=2) + except (TypeError, ValueError) as e: + current_app.logger.error(f"Error converting initial data to JSON: {e}") + field_data = "{}" + elif field_def.get('default') is not None: + field_data = field_def.get('default') + + # Create render_kw with classes and any other HTML attributes + render_kw = {'class': extra_classes} if extra_classes else {} + if description: + render_kw['title'] = description # For tooltip + render_kw['data-bs-toggle'] = 'tooltip' + render_kw['data-bs-placement'] = 'right' + + # Create the field + field_kwargs.update({ + 'label': label, + 'description': description, + 'validators': field_validators, + 'default': field_data, + 'render_kw': render_kw + }) + + unbound_field = field_class(**field_kwargs) # Bind the field to the form bound_field = unbound_field.bind(form=self, name=full_field_name) # Process the field with formdata if self.formdata and full_field_name in self.formdata: - # If formdata is available and contains the field bound_field.process(self.formdata) - elif initial_data and field_name in initial_data: - # Use initial data if provided - bound_field.process(formdata=None, data=initial_data[field_name]) else: - # Use default value - bound_field.process(formdata=None, data=default) - - # Set collection name attribute for identification - # bound_field.collection_name = collection_name + bound_field.process(formdata=None, data=field_data) # Use prepared field_data # Add the field to the form setattr(self, full_field_name, bound_field) @@ -88,5 +151,60 @@ class DynamicFormBase(FlaskForm): for full_field_name in self.dynamic_fields[collection_name]: original_field_name = full_field_name[prefix_length:] field = getattr(self, full_field_name) - data[original_field_name] = field.data + # Parse JSON for tagging_fields type + if isinstance(field, TextAreaField) and field.data: + try: + data[original_field_name] = json.loads(field.data) + except json.JSONDecodeError: + # Validation should catch this, but just in case + data[original_field_name] = field.data + else: + data[original_field_name] = field.data return data + + +def validate_tagging_fields(form, field): + """Validate the tagging fields structure""" + if not field.data: + return + + try: + # Parse JSON data + fields_data = json.loads(field.data) + + # Validate it's a dictionary + if not isinstance(fields_data, dict): + raise ValidationError("Tagging fields must be a dictionary") + + # Validate each field definition + for field_name, field_def in fields_data.items(): + if not isinstance(field_def, dict): + raise ValidationError(f"Field definition for {field_name} must be a dictionary") + + # Check required properties + if 'type' not in field_def: + raise ValidationError(f"Field {field_name} missing required 'type' property") + + # Validate type + if field_def['type'] not in ['string', 'integer', 'float', 'date', 'enum']: + raise ValidationError(f"Field {field_name} has invalid type: {field_def['type']}") + + # Validate enum fields have allowed_values + if field_def['type'] == 'enum': + if 'allowed_values' not in field_def: + raise ValidationError(f"Enum field {field_name} missing required 'allowed_values' list") + if not isinstance(field_def['allowed_values'], list): + raise ValidationError(f"Field {field_name} allowed_values must be a list") + + # Validate numeric fields + if field_def['type'] in ['integer', 'float']: + if 'min_value' in field_def and 'max_value' in field_def: + min_val = float(field_def['min_value']) + max_val = float(field_def['max_value']) + if min_val >= max_val: + raise ValidationError(f"Field {field_name} min_value must be less than max_value") + + except json.JSONDecodeError: + raise ValidationError("Invalid JSON format") + except (TypeError, ValueError) as e: + raise ValidationError(f"Invalid field definition: {str(e)}") \ No newline at end of file diff --git a/migrations/tenant/versions/b64d5cf32c7a_removing_parent_catalog_id_from_catalog_.py b/migrations/tenant/versions/b64d5cf32c7a_removing_parent_catalog_id_from_catalog_.py new file mode 100644 index 0000000..39ce7dd --- /dev/null +++ b/migrations/tenant/versions/b64d5cf32c7a_removing_parent_catalog_id_from_catalog_.py @@ -0,0 +1,33 @@ +"""Removing Parent Catalog ID from Catalog ==> use tags in user_metadata instead + +Revision ID: b64d5cf32c7a +Revises: 3717364e6429 +Create Date: 2024-10-28 07:48:04.624298 + +""" +from alembic import op +import sqlalchemy as sa +import pgvector + + +# revision identifiers, used by Alembic. +revision = 'b64d5cf32c7a' +down_revision = '3717364e6429' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('catalog_parent_id_fkey', 'catalog', type_='foreignkey') + op.drop_column('catalog', 'parent_id') + op.drop_constraint('chat_session_user_id_fkey', 'chat_session', type_='foreignkey') + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('catalog', sa.Column('parent_id', sa.INTEGER(), autoincrement=False, nullable=True)) + op.create_foreign_key('catalog_parent_id_fkey', 'catalog', 'catalog', ['parent_id'], ['id']) + # ### end Alembic commands ###