diff --git a/common/models/interaction.py b/common/models/interaction.py index 7084ba5..f37c04f 100644 --- a/common/models/interaction.py +++ b/common/models/interaction.py @@ -45,6 +45,59 @@ class Specialist(db.Model): updated_by = db.Column(db.Integer, db.ForeignKey(User.id)) +class EveAIAsset(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.String(50), nullable=False) + description = db.Column(db.Text, nullable=True) + type = db.Column(db.String(50), nullable=False, default="DOCUMENT_TEMPLATE") + type_version = db.Column(db.String(20), nullable=True, default="1.0.0") + valid_from = db.Column(db.DateTime, nullable=True) + valid_to = db.Column(db.DateTime, nullable=True) + + # Versioning Information + created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now()) + created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) + updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now()) + updated_by = db.Column(db.Integer, db.ForeignKey(User.id)) + + # Relations + versions = db.relationship('EveAIAssetVersion', backref='asset', lazy=True) + + +class EveAIAssetVersion(db.Model): + id = db.Column(db.Integer, primary_key=True) + asset_id = db.Column(db.Integer, db.ForeignKey(EveAIAsset.id), nullable=False) + bucket_name = db.Column(db.String(255), nullable=True) + configuration = db.Column(JSONB, nullable=True) + arguments = db.Column(JSONB, nullable=True) + + # Versioning Information + created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now()) + created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) + updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now()) + updated_by = db.Column(db.Integer, db.ForeignKey(User.id)) + + # Relations + instructions = db.relationship('EveAIAssetInstruction', backref='asset_version', lazy=True) + + +class EveAIAssetInstruction(db.Model): + id = db.Column(db.Integer, primary_key=True) + asset_version_id = db.Column(db.Integer, db.ForeignKey(EveAIAssetVersion.id), nullable=False) + name = db.Column(db.String(255), nullable=False) + content = db.Column(db.Text, nullable=True) + + +class EveAIProcessedAsset(db.Model): + id = db.Column(db.Integer, primary_key=True) + asset_version_id = db.Column(db.Integer, db.ForeignKey(EveAIAssetVersion.id), nullable=False) + specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=True) + chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=True) + bucket_name = db.Column(db.String(255), nullable=True) + object_name = db.Column(db.String(255), nullable=True) + created_at = db.Column(db.DateTime, nullable=True, server_default=db.func.now()) + + class EveAIAgent(db.Model): id = db.Column(db.Integer, primary_key=True) specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False) @@ -123,7 +176,6 @@ class Dispatcher(db.Model): updated_by = db.Column(db.Integer, db.ForeignKey(User.id)) - class Interaction(db.Model): id = db.Column(db.Integer, primary_key=True) chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=False) @@ -162,7 +214,3 @@ class SpecialistDispatcher(db.Model): dispatcher_id = db.Column(db.Integer, db.ForeignKey(Dispatcher.id, ondelete='CASCADE'), primary_key=True) dispatcher = db.relationship("Dispatcher", backref="specialist_dispatchers") - - - - diff --git a/common/utils/cache/config_cache.py b/common/utils/cache/config_cache.py index 402a6a0..6db4c08 100644 --- a/common/utils/cache/config_cache.py +++ b/common/utils/cache/config_cache.py @@ -6,7 +6,8 @@ import os from flask import current_app from common.utils.cache.base import CacheHandler, CacheKey -from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types +from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types, \ + catalog_types def is_major_minor(version: str) -> bool: @@ -424,6 +425,14 @@ PromptConfigCacheHandler, PromptConfigVersionTreeCacheHandler, PromptConfigTypes )) +CatalogConfigCacheHandler, CatalogConfigVersionTreeCacheHandler, CatalogConfigTypesCacheHandler = ( + create_config_cache_handlers( + config_type='catalogs', + config_dir='config/catalogs', + types_module=catalog_types.CATALOG_TYPES + + )) + def register_config_cache_handlers(cache_manager) -> None: cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config') @@ -444,6 +453,12 @@ def register_config_cache_handlers(cache_manager) -> None: cache_manager.register_handler(PromptConfigCacheHandler, 'eveai_config') cache_manager.register_handler(PromptConfigVersionTreeCacheHandler, 'eveai_config') cache_manager.register_handler(PromptConfigTypesCacheHandler, 'eveai_config') + cache_manager.register_handler(CatalogConfigCacheHandler, 'eveai_config') + cache_manager.register_handler(CatalogConfigTypesCacheHandler, 'eveai_config') + cache_manager.register_handler(CatalogConfigVersionTreeCacheHandler, 'eveai_config') + cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config') + cache_manager.register_handler(AgentConfigTypesCacheHandler, 'eveai_config') + cache_manager.register_handler(AgentConfigVersionTreeCacheHandler, 'eveai_config') cache_manager.agents_config_cache.set_version_tree_cache(cache_manager.agents_version_tree_cache) cache_manager.tasks_config_cache.set_version_tree_cache(cache_manager.tasks_version_tree_cache) diff --git a/config/retrievers/DOSSIER_RETRIEVER/1.0.0.yaml b/config/retrievers/DOSSIER_RETRIEVER/1.0.0.yaml new file mode 100644 index 0000000..50760c6 --- /dev/null +++ b/config/retrievers/DOSSIER_RETRIEVER/1.0.0.yaml @@ -0,0 +1,36 @@ +version: "1.0.0" +name: "DOSSIER Retriever" +configuration: + es_k: + name: "es_k" + type: "int" + description: "K-value to retrieve embeddings (max embeddings retrieved)" + required: true + default: 8 + es_similarity_threshold: + name: "es_similarity_threshold" + type: "float" + description: "Similarity threshold for retrieving embeddings" + required: true + default: 0.3 + tagging_fields_filter: + name: "Tagging Fields Filter" + type: "tagging_fields_filter" + description: "Filter JSON to retrieve a subset of documents" + required: true + dynamic_arguments: + name: "Dynamic Arguments" + type: "dynamic_arguments" + description: "dynamic arguments used in the filter" + required: false +arguments: + query: + name: "query" + type: "str" + description: "Query to retrieve embeddings" + required: True +metadata: + author: "Josako" + date_added: "2025-03-11" + changes: "Initial version" + description: "Retrieving all embeddings conform the query and the tagging fields filter" diff --git a/config/type_defs/catalog_types.py b/config/type_defs/catalog_types.py index e717e8a..15c4b5d 100644 --- a/config/type_defs/catalog_types.py +++ b/config/type_defs/catalog_types.py @@ -6,7 +6,7 @@ CATALOG_TYPES = { "configuration": {}, "document_version_configurations": [] }, - "DOSSIER": { + "DOSSIER_CATALOG": { "name": "Dossier Catalog", "Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored", "configuration": { diff --git a/config/type_defs/retriever_types.py b/config/type_defs/retriever_types.py index 120be25..68f6be0 100644 --- a/config/type_defs/retriever_types.py +++ b/config/type_defs/retriever_types.py @@ -3,5 +3,9 @@ RETRIEVER_TYPES = { "STANDARD_RAG": { "name": "Standard RAG Retriever", "description": "Retrieving all embeddings from the catalog conform the query", + }, + "DOSSIER_RETRIEVER": { + "name": "Retriever for managing DOSSIER catalogs", + "description": "Retrieving filtered embeddings from the catalog conform the query", } } diff --git a/eveai_app/views/document_forms.py b/eveai_app/views/document_forms.py index f54b833..0392c0f 100644 --- a/eveai_app/views/document_forms.py +++ b/eveai_app/views/document_forms.py @@ -8,6 +8,7 @@ import json from wtforms_sqlalchemy.fields import QuerySelectField +from common.extensions import cache_manager from common.models.document import Catalog from config.type_defs.catalog_types import CATALOG_TYPES @@ -150,8 +151,9 @@ class RetrieverForm(FlaskForm): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + types_dict = cache_manager.retrievers_types_cache.get_types() # Dynamically populate the 'type' field using the constructor - self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()] + self.type.choices = [(key, value['name']) for key, value in types_dict.items()] class EditRetrieverForm(DynamicFormBase): diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py index f1a23b8..5ec38ef 100644 --- a/eveai_app/views/document_views.py +++ b/eveai_app/views/document_views.py @@ -12,7 +12,7 @@ from requests.exceptions import SSLError import json from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor -from common.extensions import db +from common.extensions import db, cache_manager from common.models.interaction import Specialist, SpecialistRetriever from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \ edit_document, \ @@ -303,7 +303,8 @@ def edit_retriever(retriever_id): # Create form instance with the retriever form = EditRetrieverForm(request.form, obj=retriever) - configuration_config = RETRIEVER_TYPES[retriever.type]["configuration"] + retriever_config = cache_manager.retrievers_config_cache.get_config(retriever.type, retriever.type_version) + configuration_config = retriever_config.get("configuration") form.add_dynamic_fields("configuration", configuration_config, retriever.configuration) if form.validate_on_submit(): diff --git a/eveai_app/views/dynamic_form_base.py b/eveai_app/views/dynamic_form_base.py index bee14ab..656bd2a 100644 --- a/eveai_app/views/dynamic_form_base.py +++ b/eveai_app/views/dynamic_form_base.py @@ -16,17 +16,27 @@ class TaggingFieldsField(TextAreaField): } super().__init__(*args, **kwargs) - # def _value(self): - # if self.data: - # return json.dumps(self.data) - # return '' - # - # def process_formdata(self, valuelist): - # if valuelist and valuelist[0]: - # try: - # self.data = json.loads(valuelist[0]) - # except json.JSONDecodeError as e: - # raise ValueError('Not valid JSON content') + +class TaggingFieldsFilterField(TextAreaField): + """Field for tagging fields filter conditions""" + + def __init__(self, *args, **kwargs): + kwargs['render_kw'] = { + 'class': 'json-editor', + 'data-handle-enter': 'true' + } + super().__init__(*args, **kwargs) + + +class DynamicArgumentsField(TextAreaField): + """Field for dynamic arguments configuration""" + + def __init__(self, *args, **kwargs): + kwargs['render_kw'] = { + 'class': 'json-editor', + 'data-handle-enter': 'true' + } + super().__init__(*args, **kwargs) class ChunkingPatternsField(TextAreaField): @@ -37,15 +47,6 @@ class ChunkingPatternsField(TextAreaField): } super().__init__(*args, **kwargs) - # def _value(self): - # if self.data: - # return '\n'.join(self.data) - # return '' - # - # def process_formdata(self, valuelist): - # if valuelist and valuelist[0]: - # self.data = [line.strip() for line in valuelist[0].split('\n') if line.strip()] - class DynamicFormBase(FlaskForm): def __init__(self, formdata=None, *args, **kwargs): @@ -80,6 +81,10 @@ class DynamicFormBase(FlaskForm): ) elif field_type == 'tagging_fields': validators_list.append(self._validate_tagging_fields) + elif field_type == 'tagging_fields_filter': + validators_list.append(self._validate_tagging_fields_filter) + elif field_type == 'dynamic_arguments': + validators_list.append(self._validate_dynamic_arguments) return validators_list @@ -103,6 +108,116 @@ class DynamicFormBase(FlaskForm): except Exception as e: raise ValidationError(f"Invalid field definition: {str(e)}") + def _validate_tagging_fields_filter(self, form, field): + """Validate the tagging fields filter structure""" + if not field.data: + return + + try: + # Parse JSON data + filter_data = json.loads(field.data) + + # Basic validation of filter structure + self._validate_filter_condition(filter_data) + + except json.JSONDecodeError: + raise ValidationError("Invalid JSON format") + except ValidationError as e: + # Re-raise ValidationError from _validate_filter_condition + raise e + except Exception as e: + raise ValidationError(f"Invalid filter definition: {str(e)}") + + def _validate_filter_condition(self, condition): + """Recursively validate a filter condition structure""" + # Check if this is a logical condition (AND/OR/NOT) + if 'logical' in condition: + if condition['logical'] not in ['and', 'or', 'not']: + raise ValidationError(f"Invalid logical operator: {condition['logical']}") + + if 'conditions' not in condition: + raise ValidationError("Missing 'conditions' array for logical operator") + + if not isinstance(condition['conditions'], list): + raise ValidationError("'conditions' must be an array") + + # Special case for NOT which should have exactly one condition + if condition['logical'] == 'not' and len(condition['conditions']) != 1: + raise ValidationError("'not' operator must have exactly one condition") + + # Validate each sub-condition + for sub_condition in condition['conditions']: + self._validate_filter_condition(sub_condition) + + # Check if this is a field condition + elif 'field' in condition: + if 'operator' not in condition: + raise ValidationError(f"Missing 'operator' for field condition on {condition['field']}") + + if 'value' not in condition: + raise ValidationError(f"Missing 'value' for field condition on {condition['field']}") + + # Validate operator types + # This is a simplified check - in a real implementation, you would validate + # against the actual field type from the catalog definition + valid_operators = { + 'string': ['eq', 'neq', 'contains', 'not_contains', 'starts_with', + 'ends_with', 'in', 'not_in', 'regex', 'not_regex'], + 'numeric': ['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'between', + 'not_between', 'in', 'not_in'], + 'enum': ['eq', 'neq', 'in', 'not_in'] + } + + # For now, we just check that the operator is one of the known types + all_operators = set().union(*valid_operators.values()) + if condition['operator'] not in all_operators: + raise ValidationError(f"Unknown operator '{condition['operator']}' for field {condition['field']}") + + # Validate variable references if present + if isinstance(condition['value'], str) and condition['value'].startswith('$'): + # This is a variable reference - no further validation needed at form time + pass + + # Additional validation based on operator type could be added here + + else: + raise ValidationError("Filter condition must have either 'logical' or 'field' property") + + def _validate_dynamic_arguments(self, form, field): + """Validate the dynamic arguments structure""" + if not field.data: + return + + try: + # Parse JSON data + args_data = json.loads(field.data) + + # Validate basic structure (should be an object with argument definitions) + if not isinstance(args_data, dict): + raise ValidationError("Dynamic arguments must be an object with argument definitions") + + # Validate each argument definition + for arg_name, arg_def in args_data.items(): + if not isinstance(arg_def, dict): + raise ValidationError(f"Argument definition for '{arg_name}' must be an object") + + # Check required properties + if 'type' not in arg_def: + raise ValidationError(f"Argument '{arg_name}' missing required 'type' property") + + # Validate type + if arg_def['type'] not in ['string', 'integer', 'float', 'boolean', 'date', 'enum', 'object', 'array']: + raise ValidationError(f"Argument '{arg_name}' has invalid type: {arg_def['type']}") + + # Validate enum fields have allowed_values + if arg_def['type'] == 'enum' and 'allowed_values' not in arg_def: + raise ValidationError(f"Enum argument '{arg_name}' missing required 'allowed_values' list") + + except json.JSONDecodeError: + raise ValidationError("Invalid JSON format") + except Exception as e: + raise ValidationError(f"Invalid argument definition: {str(e)}") + def add_dynamic_fields(self, collection_name, config, initial_data=None): """Add dynamic fields to the form based on the configuration.""" self.dynamic_fields[collection_name] = [] @@ -117,11 +232,19 @@ class DynamicFormBase(FlaskForm): # Determine standard validators field_validators = self._create_field_validators(field_def) - # Handle special case for tagging_fields + # Handle special case for field types if field_type == 'tagging_fields': field_class = TaggingFieldsField extra_classes = 'json-editor' field_kwargs = {} + elif field_type == 'tagging_fields_filter': + field_class = TaggingFieldsFilterField + extra_classes = 'json-editor' + field_kwargs = {} + elif field_type == 'dynamic_arguments': + field_class = DynamicArgumentsField + extra_classes = 'json-editor' + field_kwargs = {} elif field_type == 'enum': field_class = SelectField allowed_values = field_def.get('allowed_values', []) @@ -148,7 +271,8 @@ class DynamicFormBase(FlaskForm): field_data = None if initial_data and field_name in initial_data: field_data = initial_data[field_name] - if field_type == 'tagging_fields' and isinstance(field_data, dict): + if field_type in ['tagging_fields', 'tagging_fields_filter', 'dynamic_arguments'] and isinstance( + field_data, dict): try: field_data = json.dumps(field_data, indent=2) except (TypeError, ValueError) as e: @@ -221,8 +345,8 @@ class DynamicFormBase(FlaskForm): for full_field_name in self.dynamic_fields[collection_name]: original_field_name = full_field_name[prefix_length:] field = getattr(self, full_field_name) - # Parse JSON for tagging_fields type - if isinstance(field, TaggingFieldsField) and field.data: + # Parse JSON for special field types + if isinstance(field, (TaggingFieldsField, TaggingFieldsFilterField, DynamicArgumentsField)) and field.data: try: data[original_field_name] = json.loads(field.data) except json.JSONDecodeError: @@ -282,5 +406,4 @@ def validate_tagging_fields(form, field): except json.JSONDecodeError: raise ValidationError("Invalid JSON format") except (TypeError, ValueError) as e: - raise ValidationError(f"Invalid field definition: {str(e)}") - + raise ValidationError(f"Invalid field definition: {str(e)}") \ No newline at end of file