- Implementation of specialist execution api, including SSE protocol

- eveai_chat becomes deprecated and should be replaced with SSE
- Adaptation of STANDARD_RAG specialist
- Base class definition allowing to realise specialists with crewai framework
- Implementation of SPIN_SPECIALIST
- Implementation of test app for testing specialists (test_specialist_client). Also serves as an example for future SSE-based client
- Improvements to startup scripts to better handle and scale multiple connections
- Small improvements to the interaction forms and views
- Caching implementation improved and augmented with additional caches
This commit is contained in:
Josako
2025-02-20 05:50:16 +01:00
parent d106520d22
commit 25213f2004
79 changed files with 2791 additions and 347 deletions

View File

@@ -56,6 +56,7 @@ class Retriever(db.Model):
description = db.Column(db.Text, nullable=True)
catalog_id = db.Column(db.Integer, db.ForeignKey('catalog.id'), nullable=True)
type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
type_version = db.Column(db.String(20), nullable=True, default="STANDARD_RAG")
tuning = db.Column(db.Boolean, nullable=True, default=False)
# Meta Data

0
common/utils/cache/__init__.py vendored Normal file
View File

View File

@@ -138,11 +138,14 @@ class CacheHandler(Generic[T]):
Cached or newly created value
"""
cache_key = self.generate_key(**identifiers)
current_app.logger.debug(f"Cache key: {cache_key}")
current_app.logger.debug(f"Getting Cache key: {cache_key}")
def creator():
instance = creator_func(**identifiers)
return self._to_cache_data(instance)
current_app.logger.debug("Caching object created and received. Now serializing...")
serialized_instance = self._to_cache_data(instance)
current_app.logger.debug(f"Caching object serialized and received:\n{serialized_instance}")
return serialized_instance
cached_data = self.region.get_or_create(
cache_key,

View File

@@ -6,7 +6,7 @@ import os
from flask import current_app
from common.utils.cache.base import CacheHandler, CacheKey
from config.type_defs import agent_types, task_types, tool_types, specialist_types
from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types
def is_major_minor(version: str) -> bool:
@@ -72,6 +72,7 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
current_app.logger.debug(f"Loading specific configuration for {type_name}, version: {version_str} - no cache")
version_tree = self.version_tree_cache.get_versions(type_name)
versions = version_tree['versions']
current_app.logger.debug(f"Loaded specific versions for {type_name}, versions: {versions}")
if version_str == 'latest':
version_str = version_tree['latest_version']
@@ -80,6 +81,7 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
raise ValueError(f"Version {version_str} not found for {type_name}")
file_path = versions[version_str]['file_path']
current_app.logger.debug(f'Trying to load configuration from {file_path}')
try:
with open(file_path) as f:
@@ -418,3 +420,48 @@ SpecialistConfigCacheHandler, SpecialistConfigVersionTreeCacheHandler, Specialis
config_dir='config/specialists',
types_module=specialist_types.SPECIALIST_TYPES
))
RetrieverConfigCacheHandler, RetrieverConfigVersionTreeCacheHandler, RetrieverConfigTypesCacheHandler = (
create_config_cache_handlers(
config_type='retrievers',
config_dir='config/retrievers',
types_module=retriever_types.RETRIEVER_TYPES
))
PromptConfigCacheHandler, PromptConfigVersionTreeCacheHandler, PromptConfigTypesCacheHandler = (
create_config_cache_handlers(
config_type='prompts',
config_dir='config/prompts',
types_module=prompt_types.PROMPT_TYPES
))
def register_config_cache_handlers(cache_manager) -> None:
cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(AgentConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(TaskConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(TaskConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(TaskConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(ToolConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(ToolConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(ToolConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(SpecialistConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(SpecialistConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(SpecialistConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(RetrieverConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(RetrieverConfigTypesCacheHandler, 'eveai_config')
cache_manager.register_handler(RetrieverConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigVersionTreeCacheHandler, 'eveai_config')
cache_manager.register_handler(PromptConfigTypesCacheHandler, 'eveai_config')
cache_manager.agents_config_cache.set_version_tree_cache(cache_manager.agents_version_tree_cache)
cache_manager.tasks_config_cache.set_version_tree_cache(cache_manager.tasks_version_tree_cache)
cache_manager.tools_config_cache.set_version_tree_cache(cache_manager.tools_version_tree_cache)
cache_manager.specialists_config_cache.set_version_tree_cache(cache_manager.specialists_version_tree_cache)
cache_manager.retrievers_config_cache.set_version_tree_cache(cache_manager.retrievers_version_tree_cache)
cache_manager.prompts_config_cache.set_version_tree_cache(cache_manager.prompts_version_tree_cache)

View File

@@ -0,0 +1,220 @@
from typing import Dict, Any, Type, TypeVar, List
from abc import ABC, abstractmethod
from flask import current_app
from common.extensions import cache_manager, db
from common.models.interaction import EveAIAgent, EveAITask, EveAITool, Specialist
from common.utils.cache.crewai_configuration import (
ProcessedAgentConfig, ProcessedTaskConfig, ProcessedToolConfig,
SpecialistProcessedConfig
)
T = TypeVar('T') # For generic model types
class BaseCrewAIConfigProcessor:
"""Base processor for specialist configurations"""
# Standard mapping between model fields and template placeholders
AGENT_FIELD_MAPPING = {
'role': 'custom_role',
'goal': 'custom_goal',
'backstory': 'custom_backstory'
}
TASK_FIELD_MAPPING = {
'task_description': 'custom_description',
'expected_output': 'custom_expected_output'
}
def __init__(self, tenant_id: int, specialist_id: int):
self.tenant_id = tenant_id
self.specialist_id = specialist_id
self.specialist = self._get_specialist()
self.verbose = self._get_verbose_setting()
def _get_specialist(self) -> Specialist:
"""Get specialist and verify existence"""
specialist = Specialist.query.get(self.specialist_id)
if not specialist:
raise ValueError(f"Specialist {self.specialist_id} not found")
return specialist
def _get_verbose_setting(self) -> bool:
"""Get verbose setting from specialist"""
return bool(self.specialist.tuning)
def _get_db_items(self, model_class: Type[T], type_list: List[str]) -> Dict[str, T]:
"""Get database items of specified type"""
items = (model_class.query
.filter_by(specialist_id=self.specialist_id)
.filter(model_class.type.in_(type_list))
.all())
return {item.type: item for item in items}
def _apply_replacements(self, text: str, replacements: Dict[str, str]) -> str:
"""Apply text replacements to a string"""
result = text
for key, value in replacements.items():
if value is not None: # Only replace if value exists
placeholder = "{" + key + "}"
result = result.replace(placeholder, str(value))
return result
def _process_agent_configs(self, specialist_config: Dict[str, Any]) -> Dict[str, ProcessedAgentConfig]:
"""Process all agent configurations"""
agent_configs = {}
if 'agents' not in specialist_config:
return agent_configs
# Get all DB agents at once
agent_types = [agent_def['type'] for agent_def in specialist_config['agents']]
db_agents = self._get_db_items(EveAIAgent, agent_types)
for agent_def in specialist_config['agents']:
agent_type = agent_def['type']
agent_type_lower = agent_type.lower()
db_agent = db_agents.get(agent_type)
# Get full configuration
config = cache_manager.agents_config_cache.get_config(
agent_type,
agent_def.get('version', '1.0')
)
# Start with YAML values
role = config['role']
goal = config['goal']
backstory = config['backstory']
# Apply DB values if they exist
if db_agent:
for model_field, placeholder in self.AGENT_FIELD_MAPPING.items():
value = getattr(db_agent, model_field)
if value:
placeholder_text = "{" + placeholder + "}"
role = role.replace(placeholder_text, value)
goal = goal.replace(placeholder_text, value)
backstory = backstory.replace(placeholder_text, value)
agent_configs[agent_type_lower] = ProcessedAgentConfig(
role=role,
goal=goal,
backstory=backstory,
name=agent_def.get('name') or config.get('name', agent_type_lower),
type=agent_type,
description=agent_def.get('description') or config.get('description'),
verbose=self.verbose
)
return agent_configs
def _process_task_configs(self, specialist_config: Dict[str, Any]) -> Dict[str, ProcessedTaskConfig]:
"""Process all task configurations"""
task_configs = {}
if 'tasks' not in specialist_config:
return task_configs
# Get all DB tasks at once
task_types = [task_def['type'] for task_def in specialist_config['tasks']]
db_tasks = self._get_db_items(EveAITask, task_types)
for task_def in specialist_config['tasks']:
task_type = task_def['type']
task_type_lower = task_type.lower()
db_task = db_tasks.get(task_type)
# Get full configuration
config = cache_manager.tasks_config_cache.get_config(
task_type,
task_def.get('version', '1.0')
)
# Start with YAML values
task_description = config['task_description']
expected_output = config['expected_output']
# Apply DB values if they exist
if db_task:
for model_field, placeholder in self.TASK_FIELD_MAPPING.items():
value = getattr(db_task, model_field)
if value:
placeholder_text = "{" + placeholder + "}"
task_description = task_description.replace(placeholder_text, value)
expected_output = expected_output.replace(placeholder_text, value)
task_configs[task_type_lower] = ProcessedTaskConfig(
task_description=task_description,
expected_output=expected_output,
name=task_def.get('name') or config.get('name', task_type_lower),
type=task_type,
description=task_def.get('description') or config.get('description'),
verbose=self.verbose
)
return task_configs
def _process_tool_configs(self, specialist_config: Dict[str, Any]) -> Dict[str, ProcessedToolConfig]:
"""Process all tool configurations"""
tool_configs = {}
if 'tools' not in specialist_config:
return tool_configs
# Get all DB tools at once
tool_types = [tool_def['type'] for tool_def in specialist_config['tools']]
db_tools = self._get_db_items(EveAITool, tool_types)
for tool_def in specialist_config['tools']:
tool_type = tool_def['type']
tool_type_lower = tool_type.lower()
db_tool = db_tools.get(tool_type)
# Get full configuration
config = cache_manager.tools_config_cache.get_config(
tool_type,
tool_def.get('version', '1.0')
)
# Combine configuration
tool_config = config.get('configuration', {})
if db_tool and db_tool.configuration:
tool_config.update(db_tool.configuration)
tool_configs[tool_type_lower] = ProcessedToolConfig(
name=tool_def.get('name') or config.get('name', tool_type_lower),
type=tool_type,
description=tool_def.get('description') or config.get('description'),
configuration=tool_config,
verbose=self.verbose
)
return tool_configs
def process_config(self) -> SpecialistProcessedConfig:
"""Process complete specialist configuration"""
try:
# Get full specialist configuration
specialist_config = cache_manager.specialists_config_cache.get_config(
self.specialist.type,
self.specialist.type_version
)
if not specialist_config:
raise ValueError(f"No configuration found for {self.specialist.type}")
# Process all configurations
processed_config = SpecialistProcessedConfig(
agents=self._process_agent_configs(specialist_config),
tasks=self._process_task_configs(specialist_config),
tools=self._process_tool_configs(specialist_config)
)
current_app.logger.debug(f"Processed config for tenant {self.tenant_id}, specialist {self.specialist_id}:\n"
f"{processed_config}")
return processed_config
except Exception as e:
current_app.logger.error(f"Error processing specialist configuration: {e}")
raise

View File

@@ -0,0 +1,126 @@
from dataclasses import dataclass
from typing import Dict, Any, Optional
@dataclass
class ProcessedAgentConfig:
"""Processed and ready-to-use agent configuration"""
role: str
goal: str
backstory: str
name: str
type: str
description: Optional[str] = None
verbose: bool = False
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization"""
return {
'role': self.role,
'goal': self.goal,
'backstory': self.backstory,
'name': self.name,
'type': self.type,
'description': self.description,
'verbose': self.verbose
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedAgentConfig':
"""Create from dictionary"""
return cls(**data)
@dataclass
class ProcessedTaskConfig:
"""Processed and ready-to-use task configuration"""
task_description: str
expected_output: str
name: str
type: str
description: Optional[str] = None
verbose: bool = False
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization"""
return {
'task_description': self.task_description,
'expected_output': self.expected_output,
'name': self.name,
'type': self.type,
'description': self.description,
'verbose': self.verbose
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedTaskConfig':
"""Create from dictionary"""
return cls(**data)
@dataclass
class ProcessedToolConfig:
"""Processed and ready-to-use tool configuration"""
name: str
type: str
description: Optional[str] = None
configuration: Optional[Dict[str, Any]] = None
verbose: bool = False
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization"""
return {
'name': self.name,
'type': self.type,
'description': self.description,
'configuration': self.configuration,
'verbose': self.verbose
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedToolConfig':
"""Create from dictionary"""
return cls(**data)
@dataclass
class SpecialistProcessedConfig:
"""Complete processed configuration for a specialist"""
agents: Dict[str, ProcessedAgentConfig]
tasks: Dict[str, ProcessedTaskConfig]
tools: Dict[str, ProcessedToolConfig]
def to_dict(self) -> Dict[str, Any]:
"""Convert entire configuration to dictionary"""
return {
'agents': {
agent_type: config.to_dict()
for agent_type, config in self.agents.items()
},
'tasks': {
task_type: config.to_dict()
for task_type, config in self.tasks.items()
},
'tools': {
tool_type: config.to_dict()
for tool_type, config in self.tools.items()
}
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'SpecialistProcessedConfig':
"""Create from dictionary"""
return cls(
agents={
agent_type: ProcessedAgentConfig.from_dict(config)
for agent_type, config in data['agents'].items()
},
tasks={
task_type: ProcessedTaskConfig.from_dict(config)
for task_type, config in data['tasks'].items()
},
tools={
tool_type: ProcessedToolConfig.from_dict(config)
for tool_type, config in data['tools'].items()
}
)

View File

@@ -0,0 +1,75 @@
from typing import Dict, Any, Type
from flask import current_app
from common.utils.cache.base import CacheHandler
from common.utils.cache.crewai_configuration import SpecialistProcessedConfig
from common.utils.cache.crewai_config_processor import BaseCrewAIConfigProcessor
class CrewAIProcessedConfigCacheHandler(CacheHandler[SpecialistProcessedConfig]):
"""Handles caching of processed specialist configurations"""
handler_name = 'crewai_processed_config_cache'
def __init__(self, region):
super().__init__(region, 'crewai_processed_config')
self.configure_keys('tenant_id', 'specialist_id')
def _to_cache_data(self, instance: SpecialistProcessedConfig) -> Dict[str, Any]:
"""Convert SpecialistProcessedConfig to cache data"""
return instance.to_dict()
def _from_cache_data(self, data: Dict[str, Any], **kwargs) -> SpecialistProcessedConfig:
"""Create SpecialistProcessedConfig from cache data"""
return SpecialistProcessedConfig.from_dict(data)
def _should_cache(self, value: Dict[str, Any]) -> bool:
"""Validate cache data"""
required_keys = {'agents', 'tasks', 'tools'}
if not all(key in value for key in required_keys):
current_app.logger.warning(f'CrewAI Processed Config Cache missing required keys: {required_keys}')
return False
return bool(value['agents'] or value['tasks'])
def get_specialist_config(self, tenant_id: int, specialist_id: int) -> SpecialistProcessedConfig:
"""
Get or create processed configuration for a specialist
Args:
tenant_id: Tenant ID
specialist_id: Specialist ID
Returns:
Processed specialist configuration
Raises:
ValueError: If specialist not found or processor not configured
"""
def creator_func(tenant_id: int, specialist_id: int) -> SpecialistProcessedConfig:
# Create processor instance and process config
processor = BaseCrewAIConfigProcessor(tenant_id, specialist_id)
return processor.process_config()
return self.get(
creator_func,
tenant_id=tenant_id,
specialist_id=specialist_id
)
def invalidate_tenant_specialist(self, tenant_id: int, specialist_id: int):
"""Invalidate cache for a specific tenant's specialist"""
self.invalidate(
tenant_id=tenant_id,
specialist_id=specialist_id
)
current_app.logger.info(
f"Invalidated cache for tenant {tenant_id} specialist {specialist_id}"
)
def register_specialist_cache_handlers(cache_manager) -> None:
"""Register specialist cache handlers with cache manager"""
cache_manager.register_handler(
CrewAIProcessedConfigCacheHandler,
'eveai_chat_workers'
)

View File

@@ -0,0 +1,112 @@
# common/utils/execution_progress.py
from datetime import datetime as dt, timezone as tz
from typing import Generator
from redis import Redis, RedisError
import json
from flask import current_app
class ExecutionProgressTracker:
"""Tracks progress of specialist executions using Redis"""
def __init__(self):
try:
redis_url = current_app.config['SPECIALIST_EXEC_PUBSUB']
self.redis = Redis.from_url(redis_url, socket_timeout=5)
# Test the connection
self.redis.ping()
self.expiry = 3600 # 1 hour expiry
except RedisError as e:
current_app.logger.error(f"Failed to connect to Redis: {str(e)}")
raise
except Exception as e:
current_app.logger.error(f"Unexpected error during Redis initialization: {str(e)}")
raise
def _get_key(self, execution_id: str) -> str:
return f"specialist_execution:{execution_id}"
def send_update(self, ctask_id: str, processing_type: str, data: dict):
"""Send an update about execution progress"""
try:
key = self._get_key(ctask_id)
# First verify Redis is still connected
try:
self.redis.ping()
except RedisError:
current_app.logger.error("Lost Redis connection. Attempting to reconnect...")
self.__init__() # Reinitialize connection
update = {
'processing_type': processing_type,
'data': data,
'timestamp': dt.now(tz=tz.utc)
}
# Log initial state
try:
orig_len = self.redis.llen(key)
# Try to serialize the update and check the result
try:
serialized_update = json.dumps(update, default=str) # Add default handler for datetime
except TypeError as e:
current_app.logger.error(f"Failed to serialize update: {str(e)}")
raise
# Store update in list with pipeline for atomicity
with self.redis.pipeline() as pipe:
pipe.rpush(key, serialized_update)
pipe.publish(key, serialized_update)
pipe.expire(key, self.expiry)
results = pipe.execute()
new_len = self.redis.llen(key)
if new_len <= orig_len:
current_app.logger.error(
f"List length did not increase as expected. Original: {orig_len}, New: {new_len}")
except RedisError as e:
current_app.logger.error(f"Redis operation failed: {str(e)}")
raise
except Exception as e:
current_app.logger.error(f"Unexpected error in send_update: {str(e)}, type: {type(e)}")
raise
def get_updates(self, ctask_id: str) -> Generator[str, None, None]:
key = self._get_key(ctask_id)
pubsub = self.redis.pubsub()
pubsub.subscribe(key)
try:
# First yield any existing updates
length = self.redis.llen(key)
if length > 0:
updates = self.redis.lrange(key, 0, -1)
for update in updates:
update_data = json.loads(update.decode('utf-8'))
# Use processing_type for the event
yield f"event: {update_data['processing_type']}\n"
yield f"data: {json.dumps(update_data)}\n\n"
# Then listen for new updates
while True:
message = pubsub.get_message(timeout=30) # message['type'] is Redis pub/sub type
if message is None:
yield ": keepalive\n\n"
continue
if message['type'] == 'message': # This is Redis pub/sub type
update_data = json.loads(message['data'].decode('utf-8'))
yield f"data: {message['data'].decode('utf-8')}\n\n"
# Check processing_type for completion
if update_data['processing_type'] in ['Task Complete', 'Task Error']:
break
finally:
pubsub.unsubscribe()

View File

@@ -0,0 +1,78 @@
from pydantic import BaseModel, Field
from typing import Any, Dict, List, Optional, Type, Union
def flatten_pydantic_model(model: BaseModel, merge_strategy: Dict[str, str] = {}) -> Dict[str, Any]:
"""
Flattens a nested Pydantic model by bringing all attributes to the highest level.
:param model: Pydantic model instance to be flattened.
:param merge_strategy: Dictionary defining how to handle duplicate attributes.
:return: Flattened dictionary representation of the model.
"""
flat_dict = {}
def recursive_flatten(obj: BaseModel, parent_key=""):
for field_name, value in obj.model_dump(exclude_unset=True, by_alias=True).items():
new_key = field_name # Maintain original field names
if isinstance(value, BaseModel):
# Recursively flatten nested models
recursive_flatten(value, new_key)
elif isinstance(value, list) and all(isinstance(i, BaseModel) for i in value):
# If it's a list of Pydantic models, flatten each element
for item in value:
recursive_flatten(item, new_key)
else:
if new_key in flat_dict and new_key in merge_strategy:
# Apply merge strategy
if merge_strategy[new_key] == "add":
if isinstance(flat_dict[new_key], list) and isinstance(value, list):
flat_dict[new_key] += value # Concatenate lists
elif isinstance(flat_dict[new_key], (int, float)) and isinstance(value, (int, float)):
flat_dict[new_key] += value # Sum numbers
elif isinstance(flat_dict[new_key], str) and isinstance(value, str):
flat_dict[new_key] += "\n" + value # Concatenate strings
elif merge_strategy[new_key] == "first":
pass # Keep the first occurrence
elif merge_strategy[new_key] == "last":
flat_dict[new_key] = value
else:
flat_dict[new_key] = value
recursive_flatten(model)
return flat_dict
def merge_dicts(base_dict: Dict[str, Any], new_data: Union[Dict[str, Any], BaseModel], merge_strategy: Dict[str, str]) \
-> Dict[str, Any]:
"""
Merges a Pydantic model (or dictionary) into an existing dictionary based on a merge strategy.
:param base_dict: The base dictionary to merge into.
:param new_data: The new Pydantic model or dictionary to merge.
:param merge_strategy: Dict defining how to merge duplicate attributes.
:return: Updated dictionary after merging.
"""
if isinstance(new_data, BaseModel):
new_data = flatten_pydantic_model(new_data) # Convert Pydantic model to dict
for key, value in new_data.items():
if key in base_dict and key in merge_strategy:
strategy = merge_strategy[key]
if strategy == "add":
if isinstance(base_dict[key], list) and isinstance(value, list):
base_dict[key] += value # Concatenate lists
elif isinstance(base_dict[key], (int, float)) and isinstance(value, (int, float)):
base_dict[key] += value # Sum numbers
elif isinstance(base_dict[key], str) and isinstance(value, str):
base_dict[key] += " " + value # Concatenate strings
elif strategy == "first":
pass # Keep the first occurrence (do nothing)
elif strategy == "last":
base_dict[key] = value # Always overwrite with latest value
else:
base_dict[key] = value # Add new field
return base_dict

View File

@@ -33,6 +33,7 @@ def perform_startup_invalidation(app):
# Perform invalidation
cache_manager.invalidate_region('eveai_config')
cache_manager.invalidate_region('eveai_chat_workers')
app.logger.debug(f"Cache keys after invalidation: {redis_client.keys('*')}")