- Implementation of specialist execution api, including SSE protocol

- eveai_chat becomes deprecated and should be replaced with SSE - Adaptation of STANDARD_RAG specialist - Base class definition allowing to realise specialists with crewai framework - Implementation of SPIN_SPECIALIST - Implementation of test app for testing specialists (test_specialist_client). Also serves as an example for future SSE-based client - Improvements to startup scripts to better handle and scale multiple connections - Small improvements to the interaction forms and views - Caching implementation improved and augmented with additional caches
2025-02-20 05:50:16 +01:00
parent d106520d22
commit 25213f2004
79 changed files with 2791 additions and 347 deletions
--- a/common/utils/cache/init.py
+++ b/common/utils/cache/init.py
--- a/common/utils/cache/base.py
+++ b/common/utils/cache/base.py
@@ -138,11 +138,14 @@ class CacheHandler(Generic[T]):
            Cached or newly created value
        """
        cache_key = self.generate_key(**identifiers)
-        current_app.logger.debug(f"Cache key: {cache_key}")
+        current_app.logger.debug(f"Getting Cache key: {cache_key}")

        def creator():
            instance = creator_func(**identifiers)
-            return self._to_cache_data(instance)
+            current_app.logger.debug("Caching object created and received. Now serializing...")
+            serialized_instance = self._to_cache_data(instance)
+            current_app.logger.debug(f"Caching object serialized and received:\n{serialized_instance}")
+            return serialized_instance

        cached_data = self.region.get_or_create(
            cache_key,
--- a/common/utils/cache/config_cache.py
+++ b/common/utils/cache/config_cache.py
@@ -6,7 +6,7 @@ import os
 from flask import current_app

 from common.utils.cache.base import CacheHandler, CacheKey
-from config.type_defs import agent_types, task_types, tool_types, specialist_types
+from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types


 def is_major_minor(version: str) -> bool:
@@ -72,6 +72,7 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
        current_app.logger.debug(f"Loading specific configuration for {type_name}, version: {version_str} - no cache")
        version_tree = self.version_tree_cache.get_versions(type_name)
        versions = version_tree['versions']
+        current_app.logger.debug(f"Loaded specific versions for {type_name}, versions: {versions}")

        if version_str == 'latest':
            version_str = version_tree['latest_version']
@@ -80,6 +81,7 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
            raise ValueError(f"Version {version_str} not found for {type_name}")

        file_path = versions[version_str]['file_path']
+        current_app.logger.debug(f'Trying to load configuration from {file_path}')

        try:
            with open(file_path) as f:
@@ -418,3 +420,48 @@ SpecialistConfigCacheHandler, SpecialistConfigVersionTreeCacheHandler, Specialis
        config_dir='config/specialists',
        types_module=specialist_types.SPECIALIST_TYPES
    ))
+
+
+RetrieverConfigCacheHandler, RetrieverConfigVersionTreeCacheHandler, RetrieverConfigTypesCacheHandler = (
+    create_config_cache_handlers(
+        config_type='retrievers',
+        config_dir='config/retrievers',
+        types_module=retriever_types.RETRIEVER_TYPES
+    ))
+
+
+PromptConfigCacheHandler, PromptConfigVersionTreeCacheHandler, PromptConfigTypesCacheHandler = (
+    create_config_cache_handlers(
+        config_type='prompts',
+        config_dir='config/prompts',
+        types_module=prompt_types.PROMPT_TYPES
+
+    ))
+
+
+def register_config_cache_handlers(cache_manager) -> None:
+    cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config')
+    cache_manager.register_handler(AgentConfigTypesCacheHandler, 'eveai_config')
+    cache_manager.register_handler(AgentConfigVersionTreeCacheHandler, 'eveai_config')
+    cache_manager.register_handler(TaskConfigCacheHandler, 'eveai_config')
+    cache_manager.register_handler(TaskConfigTypesCacheHandler, 'eveai_config')
+    cache_manager.register_handler(TaskConfigVersionTreeCacheHandler, 'eveai_config')
+    cache_manager.register_handler(ToolConfigCacheHandler, 'eveai_config')
+    cache_manager.register_handler(ToolConfigTypesCacheHandler, 'eveai_config')
+    cache_manager.register_handler(ToolConfigVersionTreeCacheHandler, 'eveai_config')
+    cache_manager.register_handler(SpecialistConfigCacheHandler, 'eveai_config')
+    cache_manager.register_handler(SpecialistConfigTypesCacheHandler, 'eveai_config')
+    cache_manager.register_handler(SpecialistConfigVersionTreeCacheHandler, 'eveai_config')
+    cache_manager.register_handler(RetrieverConfigCacheHandler, 'eveai_config')
+    cache_manager.register_handler(RetrieverConfigTypesCacheHandler, 'eveai_config')
+    cache_manager.register_handler(RetrieverConfigVersionTreeCacheHandler, 'eveai_config')
+    cache_manager.register_handler(PromptConfigCacheHandler, 'eveai_config')
+    cache_manager.register_handler(PromptConfigVersionTreeCacheHandler, 'eveai_config')
+    cache_manager.register_handler(PromptConfigTypesCacheHandler, 'eveai_config')
+
+    cache_manager.agents_config_cache.set_version_tree_cache(cache_manager.agents_version_tree_cache)
+    cache_manager.tasks_config_cache.set_version_tree_cache(cache_manager.tasks_version_tree_cache)
+    cache_manager.tools_config_cache.set_version_tree_cache(cache_manager.tools_version_tree_cache)
+    cache_manager.specialists_config_cache.set_version_tree_cache(cache_manager.specialists_version_tree_cache)
+    cache_manager.retrievers_config_cache.set_version_tree_cache(cache_manager.retrievers_version_tree_cache)
+    cache_manager.prompts_config_cache.set_version_tree_cache(cache_manager.prompts_version_tree_cache)
--- a/common/utils/cache/crewai_config_processor.py
+++ b/common/utils/cache/crewai_config_processor.py
@@ -0,0 +1,220 @@
+from typing import Dict, Any, Type, TypeVar, List
+from abc import ABC, abstractmethod
+from flask import current_app
+
+from common.extensions import cache_manager, db
+from common.models.interaction import EveAIAgent, EveAITask, EveAITool, Specialist
+from common.utils.cache.crewai_configuration import (
+    ProcessedAgentConfig, ProcessedTaskConfig, ProcessedToolConfig,
+    SpecialistProcessedConfig
+)
+
+T = TypeVar('T')  # For generic model types
+
+
+class BaseCrewAIConfigProcessor:
+    """Base processor for specialist configurations"""
+
+    # Standard mapping between model fields and template placeholders
+    AGENT_FIELD_MAPPING = {
+        'role': 'custom_role',
+        'goal': 'custom_goal',
+        'backstory': 'custom_backstory'
+    }
+
+    TASK_FIELD_MAPPING = {
+        'task_description': 'custom_description',
+        'expected_output': 'custom_expected_output'
+    }
+
+    def __init__(self, tenant_id: int, specialist_id: int):
+        self.tenant_id = tenant_id
+        self.specialist_id = specialist_id
+        self.specialist = self._get_specialist()
+        self.verbose = self._get_verbose_setting()
+
+    def _get_specialist(self) -> Specialist:
+        """Get specialist and verify existence"""
+        specialist = Specialist.query.get(self.specialist_id)
+        if not specialist:
+            raise ValueError(f"Specialist {self.specialist_id} not found")
+        return specialist
+
+    def _get_verbose_setting(self) -> bool:
+        """Get verbose setting from specialist"""
+        return bool(self.specialist.tuning)
+
+    def _get_db_items(self, model_class: Type[T], type_list: List[str]) -> Dict[str, T]:
+        """Get database items of specified type"""
+        items = (model_class.query
+                 .filter_by(specialist_id=self.specialist_id)
+                 .filter(model_class.type.in_(type_list))
+                 .all())
+        return {item.type: item for item in items}
+
+    def _apply_replacements(self, text: str, replacements: Dict[str, str]) -> str:
+        """Apply text replacements to a string"""
+        result = text
+        for key, value in replacements.items():
+            if value is not None:  # Only replace if value exists
+                placeholder = "{" + key + "}"
+                result = result.replace(placeholder, str(value))
+        return result
+
+    def _process_agent_configs(self, specialist_config: Dict[str, Any]) -> Dict[str, ProcessedAgentConfig]:
+        """Process all agent configurations"""
+        agent_configs = {}
+
+        if 'agents' not in specialist_config:
+            return agent_configs
+
+        # Get all DB agents at once
+        agent_types = [agent_def['type'] for agent_def in specialist_config['agents']]
+        db_agents = self._get_db_items(EveAIAgent, agent_types)
+
+        for agent_def in specialist_config['agents']:
+            agent_type = agent_def['type']
+            agent_type_lower = agent_type.lower()
+            db_agent = db_agents.get(agent_type)
+
+            # Get full configuration
+            config = cache_manager.agents_config_cache.get_config(
+                agent_type,
+                agent_def.get('version', '1.0')
+            )
+
+            # Start with YAML values
+            role = config['role']
+            goal = config['goal']
+            backstory = config['backstory']
+
+            # Apply DB values if they exist
+            if db_agent:
+                for model_field, placeholder in self.AGENT_FIELD_MAPPING.items():
+                    value = getattr(db_agent, model_field)
+                    if value:
+                        placeholder_text = "{" + placeholder + "}"
+                        role = role.replace(placeholder_text, value)
+                        goal = goal.replace(placeholder_text, value)
+                        backstory = backstory.replace(placeholder_text, value)
+
+            agent_configs[agent_type_lower] = ProcessedAgentConfig(
+                role=role,
+                goal=goal,
+                backstory=backstory,
+                name=agent_def.get('name') or config.get('name', agent_type_lower),
+                type=agent_type,
+                description=agent_def.get('description') or config.get('description'),
+                verbose=self.verbose
+            )
+
+        return agent_configs
+
+    def _process_task_configs(self, specialist_config: Dict[str, Any]) -> Dict[str, ProcessedTaskConfig]:
+        """Process all task configurations"""
+        task_configs = {}
+
+        if 'tasks' not in specialist_config:
+            return task_configs
+
+        # Get all DB tasks at once
+        task_types = [task_def['type'] for task_def in specialist_config['tasks']]
+        db_tasks = self._get_db_items(EveAITask, task_types)
+
+        for task_def in specialist_config['tasks']:
+            task_type = task_def['type']
+            task_type_lower = task_type.lower()
+            db_task = db_tasks.get(task_type)
+
+            # Get full configuration
+            config = cache_manager.tasks_config_cache.get_config(
+                task_type,
+                task_def.get('version', '1.0')
+            )
+
+            # Start with YAML values
+            task_description = config['task_description']
+            expected_output = config['expected_output']
+
+            # Apply DB values if they exist
+            if db_task:
+                for model_field, placeholder in self.TASK_FIELD_MAPPING.items():
+                    value = getattr(db_task, model_field)
+                    if value:
+                        placeholder_text = "{" + placeholder + "}"
+                        task_description = task_description.replace(placeholder_text, value)
+                        expected_output = expected_output.replace(placeholder_text, value)
+
+            task_configs[task_type_lower] = ProcessedTaskConfig(
+                task_description=task_description,
+                expected_output=expected_output,
+                name=task_def.get('name') or config.get('name', task_type_lower),
+                type=task_type,
+                description=task_def.get('description') or config.get('description'),
+                verbose=self.verbose
+            )
+
+        return task_configs
+
+    def _process_tool_configs(self, specialist_config: Dict[str, Any]) -> Dict[str, ProcessedToolConfig]:
+        """Process all tool configurations"""
+        tool_configs = {}
+
+        if 'tools' not in specialist_config:
+            return tool_configs
+
+        # Get all DB tools at once
+        tool_types = [tool_def['type'] for tool_def in specialist_config['tools']]
+        db_tools = self._get_db_items(EveAITool, tool_types)
+
+        for tool_def in specialist_config['tools']:
+            tool_type = tool_def['type']
+            tool_type_lower = tool_type.lower()
+            db_tool = db_tools.get(tool_type)
+
+            # Get full configuration
+            config = cache_manager.tools_config_cache.get_config(
+                tool_type,
+                tool_def.get('version', '1.0')
+            )
+
+            # Combine configuration
+            tool_config = config.get('configuration', {})
+            if db_tool and db_tool.configuration:
+                tool_config.update(db_tool.configuration)
+
+            tool_configs[tool_type_lower] = ProcessedToolConfig(
+                name=tool_def.get('name') or config.get('name', tool_type_lower),
+                type=tool_type,
+                description=tool_def.get('description') or config.get('description'),
+                configuration=tool_config,
+                verbose=self.verbose
+            )
+
+        return tool_configs
+
+    def process_config(self) -> SpecialistProcessedConfig:
+        """Process complete specialist configuration"""
+        try:
+            # Get full specialist configuration
+            specialist_config = cache_manager.specialists_config_cache.get_config(
+                self.specialist.type,
+                self.specialist.type_version
+            )
+
+            if not specialist_config:
+                raise ValueError(f"No configuration found for {self.specialist.type}")
+
+            # Process all configurations
+            processed_config = SpecialistProcessedConfig(
+                agents=self._process_agent_configs(specialist_config),
+                tasks=self._process_task_configs(specialist_config),
+                tools=self._process_tool_configs(specialist_config)
+            )
+            current_app.logger.debug(f"Processed config for tenant {self.tenant_id}, specialist {self.specialist_id}:\n"
+                                     f"{processed_config}")
+            return processed_config
+
+        except Exception as e:
+            current_app.logger.error(f"Error processing specialist configuration: {e}")
+            raise
--- a/common/utils/cache/crewai_configuration.py
+++ b/common/utils/cache/crewai_configuration.py
@@ -0,0 +1,126 @@
+from dataclasses import dataclass
+from typing import Dict, Any, Optional
+
+
+@dataclass
+class ProcessedAgentConfig:
+    """Processed and ready-to-use agent configuration"""
+    role: str
+    goal: str
+    backstory: str
+    name: str
+    type: str
+    description: Optional[str] = None
+    verbose: bool = False
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            'role': self.role,
+            'goal': self.goal,
+            'backstory': self.backstory,
+            'name': self.name,
+            'type': self.type,
+            'description': self.description,
+            'verbose': self.verbose
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedAgentConfig':
+        """Create from dictionary"""
+        return cls(**data)
+
+
+@dataclass
+class ProcessedTaskConfig:
+    """Processed and ready-to-use task configuration"""
+    task_description: str
+    expected_output: str
+    name: str
+    type: str
+    description: Optional[str] = None
+    verbose: bool = False
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            'task_description': self.task_description,
+            'expected_output': self.expected_output,
+            'name': self.name,
+            'type': self.type,
+            'description': self.description,
+            'verbose': self.verbose
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedTaskConfig':
+        """Create from dictionary"""
+        return cls(**data)
+
+
+@dataclass
+class ProcessedToolConfig:
+    """Processed and ready-to-use tool configuration"""
+    name: str
+    type: str
+    description: Optional[str] = None
+    configuration: Optional[Dict[str, Any]] = None
+    verbose: bool = False
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            'name': self.name,
+            'type': self.type,
+            'description': self.description,
+            'configuration': self.configuration,
+            'verbose': self.verbose
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedToolConfig':
+        """Create from dictionary"""
+        return cls(**data)
+
+
+@dataclass
+class SpecialistProcessedConfig:
+    """Complete processed configuration for a specialist"""
+    agents: Dict[str, ProcessedAgentConfig]
+    tasks: Dict[str, ProcessedTaskConfig]
+    tools: Dict[str, ProcessedToolConfig]
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert entire configuration to dictionary"""
+        return {
+            'agents': {
+                agent_type: config.to_dict()
+                for agent_type, config in self.agents.items()
+            },
+            'tasks': {
+                task_type: config.to_dict()
+                for task_type, config in self.tasks.items()
+            },
+            'tools': {
+                tool_type: config.to_dict()
+                for tool_type, config in self.tools.items()
+            }
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'SpecialistProcessedConfig':
+        """Create from dictionary"""
+        return cls(
+            agents={
+                agent_type: ProcessedAgentConfig.from_dict(config)
+                for agent_type, config in data['agents'].items()
+            },
+            tasks={
+                task_type: ProcessedTaskConfig.from_dict(config)
+                for task_type, config in data['tasks'].items()
+            },
+            tools={
+                tool_type: ProcessedToolConfig.from_dict(config)
+                for tool_type, config in data['tools'].items()
+            }
+        )
--- a/common/utils/cache/crewai_processed_config_cache.py
+++ b/common/utils/cache/crewai_processed_config_cache.py
@@ -0,0 +1,75 @@
+from typing import Dict, Any, Type
+from flask import current_app
+
+from common.utils.cache.base import CacheHandler
+from common.utils.cache.crewai_configuration import SpecialistProcessedConfig
+from common.utils.cache.crewai_config_processor import BaseCrewAIConfigProcessor
+
+
+class CrewAIProcessedConfigCacheHandler(CacheHandler[SpecialistProcessedConfig]):
+    """Handles caching of processed specialist configurations"""
+    handler_name = 'crewai_processed_config_cache'
+
+    def __init__(self, region):
+        super().__init__(region, 'crewai_processed_config')
+        self.configure_keys('tenant_id', 'specialist_id')
+
+    def _to_cache_data(self, instance: SpecialistProcessedConfig) -> Dict[str, Any]:
+        """Convert SpecialistProcessedConfig to cache data"""
+        return instance.to_dict()
+
+    def _from_cache_data(self, data: Dict[str, Any], **kwargs) -> SpecialistProcessedConfig:
+        """Create SpecialistProcessedConfig from cache data"""
+        return SpecialistProcessedConfig.from_dict(data)
+
+    def _should_cache(self, value: Dict[str, Any]) -> bool:
+        """Validate cache data"""
+        required_keys = {'agents', 'tasks', 'tools'}
+        if not all(key in value for key in required_keys):
+            current_app.logger.warning(f'CrewAI Processed Config Cache missing required keys: {required_keys}')
+            return False
+        return bool(value['agents'] or value['tasks'])
+
+    def get_specialist_config(self, tenant_id: int, specialist_id: int) -> SpecialistProcessedConfig:
+        """
+        Get or create processed configuration for a specialist
+
+        Args:
+            tenant_id: Tenant ID
+            specialist_id: Specialist ID
+
+        Returns:
+            Processed specialist configuration
+
+        Raises:
+            ValueError: If specialist not found or processor not configured
+        """
+
+        def creator_func(tenant_id: int, specialist_id: int) -> SpecialistProcessedConfig:
+            # Create processor instance and process config
+            processor = BaseCrewAIConfigProcessor(tenant_id, specialist_id)
+            return processor.process_config()
+
+        return self.get(
+            creator_func,
+            tenant_id=tenant_id,
+            specialist_id=specialist_id
+        )
+
+    def invalidate_tenant_specialist(self, tenant_id: int, specialist_id: int):
+        """Invalidate cache for a specific tenant's specialist"""
+        self.invalidate(
+            tenant_id=tenant_id,
+            specialist_id=specialist_id
+        )
+        current_app.logger.info(
+            f"Invalidated cache for tenant {tenant_id} specialist {specialist_id}"
+        )
+
+
+def register_specialist_cache_handlers(cache_manager) -> None:
+    """Register specialist cache handlers with cache manager"""
+    cache_manager.register_handler(
+        CrewAIProcessedConfigCacheHandler,
+        'eveai_chat_workers'
+    )