- Add DOSSIER Catalog management possibilities to eveai_app.

This commit is contained in:
Josako
2025-03-12 11:25:48 +01:00
parent 6465e4f358
commit 56a00c2894
8 changed files with 265 additions and 36 deletions

View File

@@ -45,6 +45,59 @@ class Specialist(db.Model):
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class EveAIAsset(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
type = db.Column(db.String(50), nullable=False, default="DOCUMENT_TEMPLATE")
type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
versions = db.relationship('EveAIAssetVersion', backref='asset', lazy=True)
class EveAIAssetVersion(db.Model):
id = db.Column(db.Integer, primary_key=True)
asset_id = db.Column(db.Integer, db.ForeignKey(EveAIAsset.id), nullable=False)
bucket_name = db.Column(db.String(255), nullable=True)
configuration = db.Column(JSONB, nullable=True)
arguments = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
instructions = db.relationship('EveAIAssetInstruction', backref='asset_version', lazy=True)
class EveAIAssetInstruction(db.Model):
id = db.Column(db.Integer, primary_key=True)
asset_version_id = db.Column(db.Integer, db.ForeignKey(EveAIAssetVersion.id), nullable=False)
name = db.Column(db.String(255), nullable=False)
content = db.Column(db.Text, nullable=True)
class EveAIProcessedAsset(db.Model):
id = db.Column(db.Integer, primary_key=True)
asset_version_id = db.Column(db.Integer, db.ForeignKey(EveAIAssetVersion.id), nullable=False)
specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=True)
chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=True)
bucket_name = db.Column(db.String(255), nullable=True)
object_name = db.Column(db.String(255), nullable=True)
created_at = db.Column(db.DateTime, nullable=True, server_default=db.func.now())
class EveAIAgent(db.Model):
id = db.Column(db.Integer, primary_key=True)
specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False)
@@ -123,7 +176,6 @@ class Dispatcher(db.Model):
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Interaction(db.Model):
id = db.Column(db.Integer, primary_key=True)
chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=False)
@@ -162,7 +214,3 @@ class SpecialistDispatcher(db.Model):
dispatcher_id = db.Column(db.Integer, db.ForeignKey(Dispatcher.id, ondelete='CASCADE'), primary_key=True)
dispatcher = db.relationship("Dispatcher", backref="specialist_dispatchers")

View File

@@ -6,7 +6,8 @@ import os
from flask import current_app
from common.utils.cache.base import CacheHandler, CacheKey
from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types
from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types, \
catalog_types
def is_major_minor(version: str) -> bool:
@@ -424,6 +425,14 @@ PromptConfigCacheHandler, PromptConfigVersionTreeCacheHandler, PromptConfigTypes
))
CatalogConfigCacheHandler, CatalogConfigVersionTreeCacheHandler, CatalogConfigTypesCacheHandler = (
create_config_cache_handlers(
config_type='catalogs',
config_dir='config/catalogs',
types_module=catalog_types.CATALOG_TYPES
))
def register_config_cache_handlers(cache_manager) -> None:
cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config')
@@ -444,6 +453,12 @@ def register_config_cache_handlers(cache_manager) -> None:
cache_manager.register_handler(PromptConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(CatalogConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(CatalogConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(CatalogConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.agents_config_cache.set_version_tree_cache(cache_manager.agents_version_tree_cache)
cache_manager.tasks_config_cache.set_version_tree_cache(cache_manager.tasks_version_tree_cache)

View File

@@ -0,0 +1,36 @@
version: "1.0.0"
name: "DOSSIER Retriever"
configuration:
es_k:
name: "es_k"
type: "int"
description: "K-value to retrieve embeddings (max embeddings retrieved)"
required: true
default: 8
es_similarity_threshold:
name: "es_similarity_threshold"
type: "float"
description: "Similarity threshold for retrieving embeddings"
required: true
default: 0.3
tagging_fields_filter:
name: "Tagging Fields Filter"
type: "tagging_fields_filter"
description: "Filter JSON to retrieve a subset of documents"
required: true
dynamic_arguments:
name: "Dynamic Arguments"
type: "dynamic_arguments"
description: "dynamic arguments used in the filter"
required: false
arguments:
query:
name: "query"
type: "str"
description: "Query to retrieve embeddings"
required: True
metadata:
author: "Josako"
date_added: "2025-03-11"
changes: "Initial version"
description: "Retrieving all embeddings conform the query and the tagging fields filter"

View File

@@ -6,7 +6,7 @@ CATALOG_TYPES = {
"configuration": {},
"document_version_configurations": []
},
"DOSSIER": {
"DOSSIER_CATALOG": {
"name": "Dossier Catalog",
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
"configuration": {

View File

@@ -3,5 +3,9 @@ RETRIEVER_TYPES = {
"STANDARD_RAG": {
"name": "Standard RAG Retriever",
"description": "Retrieving all embeddings from the catalog conform the query",
},
"DOSSIER_RETRIEVER": {
"name": "Retriever for managing DOSSIER catalogs",
"description": "Retrieving filtered embeddings from the catalog conform the query",
}
}

View File

@@ -8,6 +8,7 @@ import json
from wtforms_sqlalchemy.fields import QuerySelectField
from common.extensions import cache_manager
from common.models.document import Catalog
from config.type_defs.catalog_types import CATALOG_TYPES
@@ -150,8 +151,9 @@ class RetrieverForm(FlaskForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
types_dict = cache_manager.retrievers_types_cache.get_types()
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()]
self.type.choices = [(key, value['name']) for key, value in types_dict.items()]
class EditRetrieverForm(DynamicFormBase):

View File

@@ -12,7 +12,7 @@ from requests.exceptions import SSLError
import json
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
from common.extensions import db
from common.extensions import db, cache_manager
from common.models.interaction import Specialist, SpecialistRetriever
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
edit_document, \
@@ -303,7 +303,8 @@ def edit_retriever(retriever_id):
# Create form instance with the retriever
form = EditRetrieverForm(request.form, obj=retriever)
configuration_config = RETRIEVER_TYPES[retriever.type]["configuration"]
retriever_config = cache_manager.retrievers_config_cache.get_config(retriever.type, retriever.type_version)
configuration_config = retriever_config.get("configuration")
form.add_dynamic_fields("configuration", configuration_config, retriever.configuration)
if form.validate_on_submit():

View File

@@ -16,17 +16,27 @@ class TaggingFieldsField(TextAreaField):
}
super().__init__(*args, **kwargs)
# def _value(self):
# if self.data:
# return json.dumps(self.data)
# return ''
#
# def process_formdata(self, valuelist):
# if valuelist and valuelist[0]:
# try:
# self.data = json.loads(valuelist[0])
# except json.JSONDecodeError as e:
# raise ValueError('Not valid JSON content')
class TaggingFieldsFilterField(TextAreaField):
"""Field for tagging fields filter conditions"""
def __init__(self, *args, **kwargs):
kwargs['render_kw'] = {
'class': 'json-editor',
'data-handle-enter': 'true'
}
super().__init__(*args, **kwargs)
class DynamicArgumentsField(TextAreaField):
"""Field for dynamic arguments configuration"""
def __init__(self, *args, **kwargs):
kwargs['render_kw'] = {
'class': 'json-editor',
'data-handle-enter': 'true'
}
super().__init__(*args, **kwargs)
class ChunkingPatternsField(TextAreaField):
@@ -37,15 +47,6 @@ class ChunkingPatternsField(TextAreaField):
}
super().__init__(*args, **kwargs)
# def _value(self):
# if self.data:
# return '\n'.join(self.data)
# return ''
#
# def process_formdata(self, valuelist):
# if valuelist and valuelist[0]:
# self.data = [line.strip() for line in valuelist[0].split('\n') if line.strip()]
class DynamicFormBase(FlaskForm):
def __init__(self, formdata=None, *args, **kwargs):
@@ -80,6 +81,10 @@ class DynamicFormBase(FlaskForm):
)
elif field_type == 'tagging_fields':
validators_list.append(self._validate_tagging_fields)
elif field_type == 'tagging_fields_filter':
validators_list.append(self._validate_tagging_fields_filter)
elif field_type == 'dynamic_arguments':
validators_list.append(self._validate_dynamic_arguments)
return validators_list
@@ -103,6 +108,116 @@ class DynamicFormBase(FlaskForm):
except Exception as e:
raise ValidationError(f"Invalid field definition: {str(e)}")
def _validate_tagging_fields_filter(self, form, field):
"""Validate the tagging fields filter structure"""
if not field.data:
return
try:
# Parse JSON data
filter_data = json.loads(field.data)
# Basic validation of filter structure
self._validate_filter_condition(filter_data)
except json.JSONDecodeError:
raise ValidationError("Invalid JSON format")
except ValidationError as e:
# Re-raise ValidationError from _validate_filter_condition
raise e
except Exception as e:
raise ValidationError(f"Invalid filter definition: {str(e)}")
def _validate_filter_condition(self, condition):
"""Recursively validate a filter condition structure"""
# Check if this is a logical condition (AND/OR/NOT)
if 'logical' in condition:
if condition['logical'] not in ['and', 'or', 'not']:
raise ValidationError(f"Invalid logical operator: {condition['logical']}")
if 'conditions' not in condition:
raise ValidationError("Missing 'conditions' array for logical operator")
if not isinstance(condition['conditions'], list):
raise ValidationError("'conditions' must be an array")
# Special case for NOT which should have exactly one condition
if condition['logical'] == 'not' and len(condition['conditions']) != 1:
raise ValidationError("'not' operator must have exactly one condition")
# Validate each sub-condition
for sub_condition in condition['conditions']:
self._validate_filter_condition(sub_condition)
# Check if this is a field condition
elif 'field' in condition:
if 'operator' not in condition:
raise ValidationError(f"Missing 'operator' for field condition on {condition['field']}")
if 'value' not in condition:
raise ValidationError(f"Missing 'value' for field condition on {condition['field']}")
# Validate operator types
# This is a simplified check - in a real implementation, you would validate
# against the actual field type from the catalog definition
valid_operators = {
'string': ['eq', 'neq', 'contains', 'not_contains', 'starts_with',
'ends_with', 'in', 'not_in', 'regex', 'not_regex'],
'numeric': ['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'between',
'not_between', 'in', 'not_in'],
'enum': ['eq', 'neq', 'in', 'not_in']
}
# For now, we just check that the operator is one of the known types
all_operators = set().union(*valid_operators.values())
if condition['operator'] not in all_operators:
raise ValidationError(f"Unknown operator '{condition['operator']}' for field {condition['field']}")
# Validate variable references if present
if isinstance(condition['value'], str) and condition['value'].startswith('$'):
# This is a variable reference - no further validation needed at form time
pass
# Additional validation based on operator type could be added here
else:
raise ValidationError("Filter condition must have either 'logical' or 'field' property")
def _validate_dynamic_arguments(self, form, field):
"""Validate the dynamic arguments structure"""
if not field.data:
return
try:
# Parse JSON data
args_data = json.loads(field.data)
# Validate basic structure (should be an object with argument definitions)
if not isinstance(args_data, dict):
raise ValidationError("Dynamic arguments must be an object with argument definitions")
# Validate each argument definition
for arg_name, arg_def in args_data.items():
if not isinstance(arg_def, dict):
raise ValidationError(f"Argument definition for '{arg_name}' must be an object")
# Check required properties
if 'type' not in arg_def:
raise ValidationError(f"Argument '{arg_name}' missing required 'type' property")
# Validate type
if arg_def['type'] not in ['string', 'integer', 'float', 'boolean', 'date', 'enum', 'object', 'array']:
raise ValidationError(f"Argument '{arg_name}' has invalid type: {arg_def['type']}")
# Validate enum fields have allowed_values
if arg_def['type'] == 'enum' and 'allowed_values' not in arg_def:
raise ValidationError(f"Enum argument '{arg_name}' missing required 'allowed_values' list")
except json.JSONDecodeError:
raise ValidationError("Invalid JSON format")
except Exception as e:
raise ValidationError(f"Invalid argument definition: {str(e)}")
def add_dynamic_fields(self, collection_name, config, initial_data=None):
"""Add dynamic fields to the form based on the configuration."""
self.dynamic_fields[collection_name] = []
@@ -117,11 +232,19 @@ class DynamicFormBase(FlaskForm):
# Determine standard validators
field_validators = self._create_field_validators(field_def)
# Handle special case for tagging_fields
# Handle special case for field types
if field_type == 'tagging_fields':
field_class = TaggingFieldsField
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'tagging_fields_filter':
field_class = TaggingFieldsFilterField
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'dynamic_arguments':
field_class = DynamicArgumentsField
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'enum':
field_class = SelectField
allowed_values = field_def.get('allowed_values', [])
@@ -148,7 +271,8 @@ class DynamicFormBase(FlaskForm):
field_data = None
if initial_data and field_name in initial_data:
field_data = initial_data[field_name]
if field_type == 'tagging_fields' and isinstance(field_data, dict):
if field_type in ['tagging_fields', 'tagging_fields_filter', 'dynamic_arguments'] and isinstance(
field_data, dict):
try:
field_data = json.dumps(field_data, indent=2)
except (TypeError, ValueError) as e:
@@ -221,8 +345,8 @@ class DynamicFormBase(FlaskForm):
for full_field_name in self.dynamic_fields[collection_name]:
original_field_name = full_field_name[prefix_length:]
field = getattr(self, full_field_name)
# Parse JSON for tagging_fields type
if isinstance(field, TaggingFieldsField) and field.data:
# Parse JSON for special field types
if isinstance(field, (TaggingFieldsField, TaggingFieldsFilterField, DynamicArgumentsField)) and field.data:
try:
data[original_field_name] = json.loads(field.data)
except json.JSONDecodeError:
@@ -283,4 +407,3 @@ def validate_tagging_fields(form, field):
raise ValidationError("Invalid JSON format")
except (TypeError, ValueError) as e:
raise ValidationError(f"Invalid field definition: {str(e)}")