- Add DOSSIER Catalog management possibilities to eveai_app.

This commit is contained in:
Josako
2025-03-12 11:25:48 +01:00
parent 6465e4f358
commit 56a00c2894
8 changed files with 265 additions and 36 deletions

View File

@@ -45,6 +45,59 @@ class Specialist(db.Model):
updated_by = db.Column(db.Integer, db.ForeignKey(User.id)) updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class EveAIAsset(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
type = db.Column(db.String(50), nullable=False, default="DOCUMENT_TEMPLATE")
type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
versions = db.relationship('EveAIAssetVersion', backref='asset', lazy=True)
class EveAIAssetVersion(db.Model):
id = db.Column(db.Integer, primary_key=True)
asset_id = db.Column(db.Integer, db.ForeignKey(EveAIAsset.id), nullable=False)
bucket_name = db.Column(db.String(255), nullable=True)
configuration = db.Column(JSONB, nullable=True)
arguments = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
instructions = db.relationship('EveAIAssetInstruction', backref='asset_version', lazy=True)
class EveAIAssetInstruction(db.Model):
id = db.Column(db.Integer, primary_key=True)
asset_version_id = db.Column(db.Integer, db.ForeignKey(EveAIAssetVersion.id), nullable=False)
name = db.Column(db.String(255), nullable=False)
content = db.Column(db.Text, nullable=True)
class EveAIProcessedAsset(db.Model):
id = db.Column(db.Integer, primary_key=True)
asset_version_id = db.Column(db.Integer, db.ForeignKey(EveAIAssetVersion.id), nullable=False)
specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=True)
chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=True)
bucket_name = db.Column(db.String(255), nullable=True)
object_name = db.Column(db.String(255), nullable=True)
created_at = db.Column(db.DateTime, nullable=True, server_default=db.func.now())
class EveAIAgent(db.Model): class EveAIAgent(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False) specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False)
@@ -123,7 +176,6 @@ class Dispatcher(db.Model):
updated_by = db.Column(db.Integer, db.ForeignKey(User.id)) updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Interaction(db.Model): class Interaction(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=False) chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=False)
@@ -162,7 +214,3 @@ class SpecialistDispatcher(db.Model):
dispatcher_id = db.Column(db.Integer, db.ForeignKey(Dispatcher.id, ondelete='CASCADE'), primary_key=True) dispatcher_id = db.Column(db.Integer, db.ForeignKey(Dispatcher.id, ondelete='CASCADE'), primary_key=True)
dispatcher = db.relationship("Dispatcher", backref="specialist_dispatchers") dispatcher = db.relationship("Dispatcher", backref="specialist_dispatchers")

View File

@@ -6,7 +6,8 @@ import os
from flask import current_app from flask import current_app
from common.utils.cache.base import CacheHandler, CacheKey from common.utils.cache.base import CacheHandler, CacheKey
from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types, \
catalog_types
def is_major_minor(version: str) -> bool: def is_major_minor(version: str) -> bool:
@@ -424,6 +425,14 @@ PromptConfigCacheHandler, PromptConfigVersionTreeCacheHandler, PromptConfigTypes
)) ))
CatalogConfigCacheHandler, CatalogConfigVersionTreeCacheHandler, CatalogConfigTypesCacheHandler = (
create_config_cache_handlers(
config_type='catalogs',
config_dir='config/catalogs',
types_module=catalog_types.CATALOG_TYPES
))
def register_config_cache_handlers(cache_manager) -> None: def register_config_cache_handlers(cache_manager) -> None:
cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config') cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config')
@@ -444,6 +453,12 @@ def register_config_cache_handlers(cache_manager) -> None:
cache_manager.register_handler(PromptConfigCacheHandler, 'eveai_config') cache_manager.register_handler(PromptConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigVersionTreeCacheHandler, 'eveai_config') cache_manager.register_handler(PromptConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigTypesCacheHandler, 'eveai_config') cache_manager.register_handler(PromptConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(CatalogConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(CatalogConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(CatalogConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.agents_config_cache.set_version_tree_cache(cache_manager.agents_version_tree_cache) cache_manager.agents_config_cache.set_version_tree_cache(cache_manager.agents_version_tree_cache)
cache_manager.tasks_config_cache.set_version_tree_cache(cache_manager.tasks_version_tree_cache) cache_manager.tasks_config_cache.set_version_tree_cache(cache_manager.tasks_version_tree_cache)

View File

@@ -0,0 +1,36 @@
version: "1.0.0"
name: "DOSSIER Retriever"
configuration:
es_k:
name: "es_k"
type: "int"
description: "K-value to retrieve embeddings (max embeddings retrieved)"
required: true
default: 8
es_similarity_threshold:
name: "es_similarity_threshold"
type: "float"
description: "Similarity threshold for retrieving embeddings"
required: true
default: 0.3
tagging_fields_filter:
name: "Tagging Fields Filter"
type: "tagging_fields_filter"
description: "Filter JSON to retrieve a subset of documents"
required: true
dynamic_arguments:
name: "Dynamic Arguments"
type: "dynamic_arguments"
description: "dynamic arguments used in the filter"
required: false
arguments:
query:
name: "query"
type: "str"
description: "Query to retrieve embeddings"
required: True
metadata:
author: "Josako"
date_added: "2025-03-11"
changes: "Initial version"
description: "Retrieving all embeddings conform the query and the tagging fields filter"

View File

@@ -6,7 +6,7 @@ CATALOG_TYPES = {
"configuration": {}, "configuration": {},
"document_version_configurations": [] "document_version_configurations": []
}, },
"DOSSIER": { "DOSSIER_CATALOG": {
"name": "Dossier Catalog", "name": "Dossier Catalog",
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored", "Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
"configuration": { "configuration": {

View File

@@ -3,5 +3,9 @@ RETRIEVER_TYPES = {
"STANDARD_RAG": { "STANDARD_RAG": {
"name": "Standard RAG Retriever", "name": "Standard RAG Retriever",
"description": "Retrieving all embeddings from the catalog conform the query", "description": "Retrieving all embeddings from the catalog conform the query",
},
"DOSSIER_RETRIEVER": {
"name": "Retriever for managing DOSSIER catalogs",
"description": "Retrieving filtered embeddings from the catalog conform the query",
} }
} }

View File

@@ -8,6 +8,7 @@ import json
from wtforms_sqlalchemy.fields import QuerySelectField from wtforms_sqlalchemy.fields import QuerySelectField
from common.extensions import cache_manager
from common.models.document import Catalog from common.models.document import Catalog
from config.type_defs.catalog_types import CATALOG_TYPES from config.type_defs.catalog_types import CATALOG_TYPES
@@ -150,8 +151,9 @@ class RetrieverForm(FlaskForm):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
types_dict = cache_manager.retrievers_types_cache.get_types()
# Dynamically populate the 'type' field using the constructor # Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()] self.type.choices = [(key, value['name']) for key, value in types_dict.items()]
class EditRetrieverForm(DynamicFormBase): class EditRetrieverForm(DynamicFormBase):

View File

@@ -12,7 +12,7 @@ from requests.exceptions import SSLError
import json import json
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
from common.extensions import db from common.extensions import db, cache_manager
from common.models.interaction import Specialist, SpecialistRetriever from common.models.interaction import Specialist, SpecialistRetriever
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \ from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
edit_document, \ edit_document, \
@@ -303,7 +303,8 @@ def edit_retriever(retriever_id):
# Create form instance with the retriever # Create form instance with the retriever
form = EditRetrieverForm(request.form, obj=retriever) form = EditRetrieverForm(request.form, obj=retriever)
configuration_config = RETRIEVER_TYPES[retriever.type]["configuration"] retriever_config = cache_manager.retrievers_config_cache.get_config(retriever.type, retriever.type_version)
configuration_config = retriever_config.get("configuration")
form.add_dynamic_fields("configuration", configuration_config, retriever.configuration) form.add_dynamic_fields("configuration", configuration_config, retriever.configuration)
if form.validate_on_submit(): if form.validate_on_submit():

View File

@@ -16,17 +16,27 @@ class TaggingFieldsField(TextAreaField):
} }
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# def _value(self):
# if self.data: class TaggingFieldsFilterField(TextAreaField):
# return json.dumps(self.data) """Field for tagging fields filter conditions"""
# return ''
# def __init__(self, *args, **kwargs):
# def process_formdata(self, valuelist): kwargs['render_kw'] = {
# if valuelist and valuelist[0]: 'class': 'json-editor',
# try: 'data-handle-enter': 'true'
# self.data = json.loads(valuelist[0]) }
# except json.JSONDecodeError as e: super().__init__(*args, **kwargs)
# raise ValueError('Not valid JSON content')
class DynamicArgumentsField(TextAreaField):
"""Field for dynamic arguments configuration"""
def __init__(self, *args, **kwargs):
kwargs['render_kw'] = {
'class': 'json-editor',
'data-handle-enter': 'true'
}
super().__init__(*args, **kwargs)
class ChunkingPatternsField(TextAreaField): class ChunkingPatternsField(TextAreaField):
@@ -37,15 +47,6 @@ class ChunkingPatternsField(TextAreaField):
} }
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# def _value(self):
# if self.data:
# return '\n'.join(self.data)
# return ''
#
# def process_formdata(self, valuelist):
# if valuelist and valuelist[0]:
# self.data = [line.strip() for line in valuelist[0].split('\n') if line.strip()]
class DynamicFormBase(FlaskForm): class DynamicFormBase(FlaskForm):
def __init__(self, formdata=None, *args, **kwargs): def __init__(self, formdata=None, *args, **kwargs):
@@ -80,6 +81,10 @@ class DynamicFormBase(FlaskForm):
) )
elif field_type == 'tagging_fields': elif field_type == 'tagging_fields':
validators_list.append(self._validate_tagging_fields) validators_list.append(self._validate_tagging_fields)
elif field_type == 'tagging_fields_filter':
validators_list.append(self._validate_tagging_fields_filter)
elif field_type == 'dynamic_arguments':
validators_list.append(self._validate_dynamic_arguments)
return validators_list return validators_list
@@ -103,6 +108,116 @@ class DynamicFormBase(FlaskForm):
except Exception as e: except Exception as e:
raise ValidationError(f"Invalid field definition: {str(e)}") raise ValidationError(f"Invalid field definition: {str(e)}")
def _validate_tagging_fields_filter(self, form, field):
"""Validate the tagging fields filter structure"""
if not field.data:
return
try:
# Parse JSON data
filter_data = json.loads(field.data)
# Basic validation of filter structure
self._validate_filter_condition(filter_data)
except json.JSONDecodeError:
raise ValidationError("Invalid JSON format")
except ValidationError as e:
# Re-raise ValidationError from _validate_filter_condition
raise e
except Exception as e:
raise ValidationError(f"Invalid filter definition: {str(e)}")
def _validate_filter_condition(self, condition):
"""Recursively validate a filter condition structure"""
# Check if this is a logical condition (AND/OR/NOT)
if 'logical' in condition:
if condition['logical'] not in ['and', 'or', 'not']:
raise ValidationError(f"Invalid logical operator: {condition['logical']}")
if 'conditions' not in condition:
raise ValidationError("Missing 'conditions' array for logical operator")
if not isinstance(condition['conditions'], list):
raise ValidationError("'conditions' must be an array")
# Special case for NOT which should have exactly one condition
if condition['logical'] == 'not' and len(condition['conditions']) != 1:
raise ValidationError("'not' operator must have exactly one condition")
# Validate each sub-condition
for sub_condition in condition['conditions']:
self._validate_filter_condition(sub_condition)
# Check if this is a field condition
elif 'field' in condition:
if 'operator' not in condition:
raise ValidationError(f"Missing 'operator' for field condition on {condition['field']}")
if 'value' not in condition:
raise ValidationError(f"Missing 'value' for field condition on {condition['field']}")
# Validate operator types
# This is a simplified check - in a real implementation, you would validate
# against the actual field type from the catalog definition
valid_operators = {
'string': ['eq', 'neq', 'contains', 'not_contains', 'starts_with',
'ends_with', 'in', 'not_in', 'regex', 'not_regex'],
'numeric': ['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'between',
'not_between', 'in', 'not_in'],
'enum': ['eq', 'neq', 'in', 'not_in']
}
# For now, we just check that the operator is one of the known types
all_operators = set().union(*valid_operators.values())
if condition['operator'] not in all_operators:
raise ValidationError(f"Unknown operator '{condition['operator']}' for field {condition['field']}")
# Validate variable references if present
if isinstance(condition['value'], str) and condition['value'].startswith('$'):
# This is a variable reference - no further validation needed at form time
pass
# Additional validation based on operator type could be added here
else:
raise ValidationError("Filter condition must have either 'logical' or 'field' property")
def _validate_dynamic_arguments(self, form, field):
"""Validate the dynamic arguments structure"""
if not field.data:
return
try:
# Parse JSON data
args_data = json.loads(field.data)
# Validate basic structure (should be an object with argument definitions)
if not isinstance(args_data, dict):
raise ValidationError("Dynamic arguments must be an object with argument definitions")
# Validate each argument definition
for arg_name, arg_def in args_data.items():
if not isinstance(arg_def, dict):
raise ValidationError(f"Argument definition for '{arg_name}' must be an object")
# Check required properties
if 'type' not in arg_def:
raise ValidationError(f"Argument '{arg_name}' missing required 'type' property")
# Validate type
if arg_def['type'] not in ['string', 'integer', 'float', 'boolean', 'date', 'enum', 'object', 'array']:
raise ValidationError(f"Argument '{arg_name}' has invalid type: {arg_def['type']}")
# Validate enum fields have allowed_values
if arg_def['type'] == 'enum' and 'allowed_values' not in arg_def:
raise ValidationError(f"Enum argument '{arg_name}' missing required 'allowed_values' list")
except json.JSONDecodeError:
raise ValidationError("Invalid JSON format")
except Exception as e:
raise ValidationError(f"Invalid argument definition: {str(e)}")
def add_dynamic_fields(self, collection_name, config, initial_data=None): def add_dynamic_fields(self, collection_name, config, initial_data=None):
"""Add dynamic fields to the form based on the configuration.""" """Add dynamic fields to the form based on the configuration."""
self.dynamic_fields[collection_name] = [] self.dynamic_fields[collection_name] = []
@@ -117,11 +232,19 @@ class DynamicFormBase(FlaskForm):
# Determine standard validators # Determine standard validators
field_validators = self._create_field_validators(field_def) field_validators = self._create_field_validators(field_def)
# Handle special case for tagging_fields # Handle special case for field types
if field_type == 'tagging_fields': if field_type == 'tagging_fields':
field_class = TaggingFieldsField field_class = TaggingFieldsField
extra_classes = 'json-editor' extra_classes = 'json-editor'
field_kwargs = {} field_kwargs = {}
elif field_type == 'tagging_fields_filter':
field_class = TaggingFieldsFilterField
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'dynamic_arguments':
field_class = DynamicArgumentsField
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'enum': elif field_type == 'enum':
field_class = SelectField field_class = SelectField
allowed_values = field_def.get('allowed_values', []) allowed_values = field_def.get('allowed_values', [])
@@ -148,7 +271,8 @@ class DynamicFormBase(FlaskForm):
field_data = None field_data = None
if initial_data and field_name in initial_data: if initial_data and field_name in initial_data:
field_data = initial_data[field_name] field_data = initial_data[field_name]
if field_type == 'tagging_fields' and isinstance(field_data, dict): if field_type in ['tagging_fields', 'tagging_fields_filter', 'dynamic_arguments'] and isinstance(
field_data, dict):
try: try:
field_data = json.dumps(field_data, indent=2) field_data = json.dumps(field_data, indent=2)
except (TypeError, ValueError) as e: except (TypeError, ValueError) as e:
@@ -221,8 +345,8 @@ class DynamicFormBase(FlaskForm):
for full_field_name in self.dynamic_fields[collection_name]: for full_field_name in self.dynamic_fields[collection_name]:
original_field_name = full_field_name[prefix_length:] original_field_name = full_field_name[prefix_length:]
field = getattr(self, full_field_name) field = getattr(self, full_field_name)
# Parse JSON for tagging_fields type # Parse JSON for special field types
if isinstance(field, TaggingFieldsField) and field.data: if isinstance(field, (TaggingFieldsField, TaggingFieldsFilterField, DynamicArgumentsField)) and field.data:
try: try:
data[original_field_name] = json.loads(field.data) data[original_field_name] = json.loads(field.data)
except json.JSONDecodeError: except json.JSONDecodeError:
@@ -283,4 +407,3 @@ def validate_tagging_fields(form, field):
raise ValidationError("Invalid JSON format") raise ValidationError("Invalid JSON format")
except (TypeError, ValueError) as e: except (TypeError, ValueError) as e:
raise ValidationError(f"Invalid field definition: {str(e)}") raise ValidationError(f"Invalid field definition: {str(e)}")