- Add configuration of agents, tasks, tools, specialist in context of SPIN specialist

- correct startup of applications using gevent - introduce startup scripts (eveai_app) - caching manager for all configurations
2025-01-16 20:27:00 +01:00
parent f7cd58ed2a
commit 7bddeb0ebd
69 changed files with 1991 additions and 345 deletions
--- a/common/extensions.py
+++ b/common/extensions.py
@@ -35,4 +35,6 @@ simple_encryption = SimpleEncryption()
 minio_client = MinioClient()
 metrics = PrometheusMetrics.for_app_factory()
 template_manager = TemplateManager()
+# Caching classes
 cache_manager = EveAICacheManager()
+
--- a/common/models/interaction.py
+++ b/common/models/interaction.py
@@ -25,6 +25,7 @@ class Specialist(db.Model):
    name = db.Column(db.String(50), nullable=False)
    description = db.Column(db.Text, nullable=True)
    type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
+    type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
    tuning = db.Column(db.Boolean, nullable=True, default=False)
    configuration = db.Column(JSONB, nullable=True)
    arguments = db.Column(JSONB, nullable=True)
@@ -32,6 +33,10 @@ class Specialist(db.Model):
    # Relationship to retrievers through the association table
    retrievers = db.relationship('SpecialistRetriever', backref='specialist', lazy=True,
                                 cascade="all, delete-orphan")
+    agents = db.relationship('EveAIAgent', backref='specialist', lazy=True)
+    tasks = db.relationship('EveAITask', backref='specialist', lazy=True)
+    tools = db.relationship('EveAITool', backref='specialist', lazy=True)
+    dispatchers = db.relationship('SpecialistDispatcher', backref='specialist', lazy=True)

    # Versioning Information
    created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
@@ -40,6 +45,84 @@ class Specialist(db.Model):
    updated_by = db.Column(db.Integer, db.ForeignKey(User.id))


+class EveAIAgent(db.Model):
+    id = db.Column(db.Integer, primary_key=True)
+    specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False)
+    name = db.Column(db.String(50), nullable=False)
+    description = db.Column(db.Text, nullable=True)
+    type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
+    type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
+    role = db.Column(db.Text, nullable=True)
+    goal = db.Column(db.Text, nullable=True)
+    backstory = db.Column(db.Text, nullable=True)
+    tuning = db.Column(db.Boolean, nullable=True, default=False)
+    configuration = db.Column(JSONB, nullable=True)
+    arguments = db.Column(JSONB, nullable=True)
+
+    # Versioning Information
+    created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
+    created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
+    updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
+    updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
+
+
+class EveAITask(db.Model):
+    id = db.Column(db.Integer, primary_key=True)
+    specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False)
+    name = db.Column(db.String(50), nullable=False)
+    description = db.Column(db.Text, nullable=True)
+    type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
+    type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
+    expected_output = db.Column(db.Text, nullable=True)
+    tuning = db.Column(db.Boolean, nullable=True, default=False)
+    configuration = db.Column(JSONB, nullable=True)
+    arguments = db.Column(JSONB, nullable=True)
+    context = db.Column(JSONB, nullable=True)
+    asynchronous = db.Column(db.Boolean, nullable=True, default=False)
+
+    # Versioning Information
+    created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
+    created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
+    updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
+    updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
+
+
+class EveAITool(db.Model):
+    id = db.Column(db.Integer, primary_key=True)
+    specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id), nullable=False)
+    name = db.Column(db.String(50), nullable=False)
+    description = db.Column(db.Text, nullable=True)
+    type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
+    type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
+    tuning = db.Column(db.Boolean, nullable=True, default=False)
+    configuration = db.Column(JSONB, nullable=True)
+    arguments = db.Column(JSONB, nullable=True)
+
+    # Versioning Information
+    created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
+    created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
+    updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
+    updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
+
+
+class Dispatcher(db.Model):
+    id = db.Column(db.Integer, primary_key=True)
+    name = db.Column(db.String(50), nullable=False)
+    description = db.Column(db.Text, nullable=True)
+    type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
+    type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
+    tuning = db.Column(db.Boolean, nullable=True, default=False)
+    configuration = db.Column(JSONB, nullable=True)
+    arguments = db.Column(JSONB, nullable=True)
+
+    # Versioning Information
+    created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
+    created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
+    updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
+    updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
+
+
+
 class Interaction(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    chat_session_id = db.Column(db.Integer, db.ForeignKey(ChatSession.id), nullable=False)
@@ -71,3 +154,14 @@ class SpecialistRetriever(db.Model):
    retriever_id = db.Column(db.Integer, db.ForeignKey(Retriever.id, ondelete='CASCADE'), primary_key=True)

    retriever = db.relationship("Retriever", backref="specialist_retrievers")
+
+
+class SpecialistDispatcher(db.Model):
+    specialist_id = db.Column(db.Integer, db.ForeignKey(Specialist.id, ondelete='CASCADE'), primary_key=True)
+    dispatcher_id = db.Column(db.Integer, db.ForeignKey(Dispatcher.id, ondelete='CASCADE'), primary_key=True)
+
+    dispatcher = db.relationship("Dispatcher", backref="specialist_dispatchers")
+
+
+
+
--- a/common/utils/cache/base.py
+++ b/common/utils/cache/base.py
@@ -1,57 +1,78 @@
-# common/utils/cache/base.py
-
 from typing import Any, Dict, List, Optional, TypeVar, Generic, Type
 from dataclasses import dataclass
 from flask import Flask
 from dogpile.cache import CacheRegion

-T = TypeVar('T')
+T = TypeVar('T')  # Generic type parameter for cached data


@dataclass
 class CacheKey:
-    """Represents a cache key with multiple components"""
+    """
+    Represents a composite cache key made up of multiple components.
+    Enables structured and consistent key generation for cache entries.
+
+    Attributes:
+        components (Dict[str, Any]): Dictionary of key components and their values
+
+    Example:
+        key = CacheKey({'tenant_id': 123, 'user_id': 456})
+        str(key) -> "tenant_id=123:user_id=456"
+    """
    components: Dict[str, Any]

    def __str__(self) -> str:
+        """
+        Converts components into a deterministic string representation.
+        Components are sorted alphabetically to ensure consistent key generation.
+        """
        return ":".join(f"{k}={v}" for k, v in sorted(self.components.items()))


-class CacheInvalidationManager:
-    """Manages cache invalidation subscriptions"""
-
-    def __init__(self):
-        self._subscribers = {}
-
-    def subscribe(self, model: str, handler: 'CacheHandler', key_fields: List[str]):
-        if model not in self._subscribers:
-            self._subscribers[model] = []
-        self._subscribers[model].append((handler, key_fields))
-
-    def notify_change(self, model: str, **identifiers):
-        if model in self._subscribers:
-            for handler, key_fields in self._subscribers[model]:
-                if all(field in identifiers for field in key_fields):
-                    handler.invalidate_by_model(model, **identifiers)
-
-
 class CacheHandler(Generic[T]):
-    """Base cache handler implementation"""
+    """
+    Base cache handler implementation providing structured caching functionality.
+    Uses generics to ensure type safety of cached data.
+
+    Type Parameters:
+        T: Type of data being cached
+
+    Attributes:
+        region (CacheRegion): Dogpile cache region for storage
+        prefix (str): Prefix for all cache keys managed by this handler
+    """

    def __init__(self, region: CacheRegion, prefix: str):
        self.region = region
        self.prefix = prefix
-        self._key_components = []
+        self._key_components = []  # List of required key components

    def configure_keys(self, *components: str):
+        """
+        Configure required components for cache key generation.
+
+        Args:
+            *components: Required key component names
+
+        Returns:
+            self for method chaining
+        """
        self._key_components = components
        return self

-    def subscribe_to_model(self, model: str, key_fields: List[str]):
-        invalidation_manager.subscribe(model, self, key_fields)
-        return self
-
    def generate_key(self, **identifiers) -> str:
+        """
+        Generate a cache key from provided identifiers.
+
+        Args:
+            **identifiers: Key-value pairs for key components
+
+        Returns:
+            Formatted cache key string
+
+        Raises:
+            ValueError: If required components are missing
+        """
        missing = set(self._key_components) - set(identifiers.keys())
        if missing:
            raise ValueError(f"Missing key components: {missing}")
@@ -60,6 +81,16 @@ class CacheHandler(Generic[T]):
        return f"{self.prefix}:{str(key)}"

    def get(self, creator_func, **identifiers) -> T:
+        """
+        Get or create a cached value.
+
+        Args:
+            creator_func: Function to create value if not cached
+            **identifiers: Key components for cache key
+
+        Returns:
+            Cached or newly created value
+        """
        cache_key = self.generate_key(**identifiers)

        def creator():
@@ -75,15 +106,25 @@ class CacheHandler(Generic[T]):
        return self.from_cache_data(cached_data, **identifiers)

    def invalidate(self, **identifiers):
+        """
+        Invalidate a specific cache entry.
+
+        Args:
+            **identifiers: Key components for the cache entry
+        """
        cache_key = self.generate_key(**identifiers)
        self.region.delete(cache_key)

    def invalidate_by_model(self, model: str, **identifiers):
+        """
+        Invalidate cache entry based on model changes.
+
+        Args:
+            model: Changed model name
+            **identifiers: Model instance identifiers
+        """
        try:
            self.invalidate(**identifiers)
        except ValueError:
-            pass
+            pass  # Skip if cache key can't be generated from provided identifiers

-
-# Create global invalidation manager
-invalidation_manager = CacheInvalidationManager()
--- a/common/utils/cache/config_cache.py
+++ b/common/utils/cache/config_cache.py
@@ -0,0 +1,306 @@
+from typing import Dict, Any, Optional
+from pathlib import Path
+import yaml
+from packaging import version
+import os
+from flask import current_app
+
+from common.utils.cache.base import CacheHandler
+
+from config.type_defs import agent_types, task_types, tool_types, specialist_types
+
+
+class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
+    """Base handler for configuration caching"""
+
+    def __init__(self, region, config_type: str):
+        """
+        Args:
+            region: Cache region
+            config_type: Type of configuration (agents, tasks, etc.)
+        """
+        super().__init__(region, f'config_{config_type}')
+        self.config_type = config_type
+        self._types_module = None  # Set by subclasses
+        self._config_dir = None  # Set by subclasses
+
+    def configure_keys_for_operation(self, operation: str):
+        """Configure required keys based on operation"""
+        match operation:
+            case 'get_types':
+                self.configure_keys('type_name')  # Only require type_name for type definitions
+            case 'get_versions':
+                self.configure_keys('type_name')  # Only type_name needed for version tree
+            case 'get_config':
+                self.configure_keys('type_name', 'version')  # Both needed for specific config
+            case _:
+                raise ValueError(f"Unknown operation: {operation}")
+
+    def _load_version_tree(self, type_name: str) -> Dict[str, Any]:
+        """
+        Load version tree for a specific type without loading full configurations
+
+        Args:
+            type_name: Name of configuration type
+
+        Returns:
+            Dict containing available versions and their metadata
+        """
+        type_path = Path(self._config_dir) / type_name
+        if not type_path.exists():
+            raise ValueError(f"No configuration found for type {type_name}")
+
+        version_files = list(type_path.glob('*.yaml'))
+        if not version_files:
+            raise ValueError(f"No versions found for type {type_name}")
+
+        versions = {}
+        latest_version = None
+        latest_version_obj = None
+
+        for file_path in version_files:
+            ver = file_path.stem  # Get version from filename
+            try:
+                ver_obj = version.parse(ver)
+                # Only load minimal metadata for version tree
+                with open(file_path) as f:
+                    yaml_data = yaml.safe_load(f)
+                    metadata = yaml_data.get('metadata', {})
+                    versions[ver] = {
+                        'metadata': metadata,
+                        'file_path': str(file_path)
+                    }
+
+                # Track latest version
+                if latest_version_obj is None or ver_obj > latest_version_obj:
+                    latest_version = ver
+                    latest_version_obj = ver_obj
+
+            except Exception as e:
+                current_app.logger.error(f"Error loading version {ver}: {e}")
+                continue
+
+        current_app.logger.debug(f"Loaded versions for {type_name}: {versions}")
+        current_app.logger.debug(f"Loaded versions for {type_name}: {latest_version}")
+        return {
+            'versions': versions,
+            'latest_version': latest_version
+        }
+
+    def to_cache_data(self, instance: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert the data to a cacheable format"""
+        # For configuration data, we can just return the dictionary as is
+        # since it's already in a serializable format
+        return instance
+
+    def from_cache_data(self, data: Dict[str, Any], **kwargs) -> Dict[str, Any]:
+        """Convert cached data back to usable format"""
+        # Similarly, we can return the data directly since it's already
+        # in the format we need
+        return data
+
+    def should_cache(self, value: Dict[str, Any]) -> bool:
+        """
+        Validate if the value should be cached
+
+        Args:
+            value: The value to be cached
+
+        Returns:
+            bool: True if the value should be cached
+        """
+        if not isinstance(value, dict):
+            return False
+
+        # For type definitions
+        if 'name' in value and 'description' in value:
+            return True
+
+        # For configurations
+        if 'versions' in value and 'latest_version' in value:
+            return True
+
+        return False
+
+    def _load_type_definitions(self) -> Dict[str, Dict[str, str]]:
+        """Load type definitions from the corresponding type_defs module"""
+        if not self._types_module:
+            raise ValueError("_types_module must be set by subclass")
+
+        type_definitions = {
+            type_id: {
+                'name': info['name'],
+                'description': info['description']
+            }
+            for type_id, info in self._types_module.items()
+        }
+
+        current_app.logger.debug(f"Loaded type definitions: {type_definitions}")
+
+        return type_definitions
+
+    def _load_specific_config(self, type_name: str, version_str: str) -> Dict[str, Any]:
+        """
+        Load a specific configuration version
+
+        Args:
+            type_name: Type name
+            version_str: Version string
+
+        Returns:
+            Configuration data
+        """
+        version_tree = self.get_versions(type_name)
+        versions = version_tree['versions']
+
+        if version_str == 'latest':
+            version_str = version_tree['latest_version']
+
+        if version_str not in versions:
+            raise ValueError(f"Version {version_str} not found for {type_name}")
+
+        file_path = versions[version_str]['file_path']
+
+        try:
+            with open(file_path) as f:
+                config = yaml.safe_load(f)
+                current_app.logger.debug(f"Loaded config for {type_name}{version_str}: {config}")
+                return config
+        except Exception as e:
+            raise ValueError(f"Error loading config from {file_path}: {e}")
+
+    def get_versions(self, type_name: str) -> Dict[str, Any]:
+        """
+        Get version tree for a type
+
+        Args:
+            type_name: Type to get versions for
+
+        Returns:
+            Dict with version information
+        """
+        self.configure_keys_for_operation('get_versions')
+        return self.get(
+            lambda type_name: self._load_version_tree(type_name),
+            type_name=type_name
+        )
+
+    def get_latest_version(self, type_name: str) -> str:
+        """
+        Get the latest version for a given type name.
+
+        Args:
+            type_name: Name of the configuration type
+
+        Returns:
+            Latest version string
+
+        Raises:
+            ValueError: If type not found or no versions available
+        """
+        version_tree = self.get_versions(type_name)
+        if not version_tree or 'latest_version' not in version_tree:
+            raise ValueError(f"No versions found for {type_name}")
+
+        return version_tree['latest_version']
+
+    def get_latest_patch_version(self, type_name: str, major_minor: str) -> str:
+        """
+        Get the latest patch version for a given major.minor version.
+
+        Args:
+            type_name: Name of the configuration type
+            major_minor: Major.minor version (e.g. "1.0")
+
+        Returns:
+            Latest patch version string (e.g. "1.0.3")
+
+        Raises:
+            ValueError: If type not found or no matching versions
+        """
+        version_tree = self.get_versions(type_name)
+        if not version_tree or 'versions' not in version_tree:
+            raise ValueError(f"No versions found for {type_name}")
+
+        # Filter versions that match the major.minor prefix
+        matching_versions = [
+            ver for ver in version_tree['versions'].keys()
+            if ver.startswith(major_minor + '.')
+        ]
+
+        if not matching_versions:
+            raise ValueError(f"No versions found for {type_name} with prefix {major_minor}")
+
+        # Return highest matching version
+        latest_patch = max(matching_versions, key=version.parse)
+        return latest_patch
+
+    def get_types(self) -> Dict[str, Dict[str, str]]:
+        """Get dictionary of available types with name and description"""
+        self.configure_keys_for_operation('get_types')
+        result =  self.get(
+            lambda type_name: self._load_type_definitions(),
+            type_name=f'{self.config_type}_types',
+        )
+        return result
+
+    def get_config(self, type_name: str, version: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Get configuration for a specific type and version
+        If version not specified, returns latest
+
+        Args:
+            type_name: Configuration type name
+            version: Optional specific version to retrieve
+
+        Returns:
+            Configuration data
+        """
+        self.configure_keys_for_operation('get_config')
+        version_str = version or 'latest'
+
+        return self.get(
+            lambda type_name, version: self._load_specific_config(type_name, version),
+            type_name=type_name,
+            version=version_str
+        )
+
+
+class AgentConfigCacheHandler(BaseConfigCacheHandler):
+    """Handler for agent configurations"""
+    handler_name = 'agent_config_cache'
+
+    def __init__(self, region):
+        super().__init__(region, 'agents')
+        self._types_module = agent_types.AGENT_TYPES
+        self._config_dir = os.path.join('config', 'agents')
+
+
+class TaskConfigCacheHandler(BaseConfigCacheHandler):
+    """Handler for task configurations"""
+    handler_name = 'task_config_cache'
+
+    def __init__(self, region):
+        super().__init__(region, 'tasks')
+        self._types_module = task_types.TASK_TYPES
+        self._config_dir = os.path.join('config', 'tasks')
+
+
+class ToolConfigCacheHandler(BaseConfigCacheHandler):
+    """Handler for tool configurations"""
+    handler_name = 'tool_config_cache'
+
+    def __init__(self, region):
+        super().__init__(region, 'tools')
+        self._types_module = tool_types.TOOL_TYPES
+        self._config_dir = os.path.join('config', 'tools')
+
+
+class SpecialistConfigCacheHandler(BaseConfigCacheHandler):
+    """Handler for specialist configurations"""
+    handler_name = 'specialist_config_cache'
+
+    def __init__(self, region):
+        super().__init__(region, 'specialists')
+        self._types_module = specialist_types.SPECIALIST_TYPES
+        self._config_dir = os.path.join('config', 'specialists')
--- a/common/utils/cache/eveai_cache_manager.py
+++ b/common/utils/cache/eveai_cache_manager.py
@@ -3,6 +3,8 @@ from typing import Type
 from flask import Flask

 from common.utils.cache.base import CacheHandler
+from common.utils.cache.regions import create_cache_regions
+from common.utils.cache.config_cache import AgentConfigCacheHandler


 class EveAICacheManager:
@@ -11,29 +13,39 @@ class EveAICacheManager:
    def __init__(self):
        self._regions = {}
        self._handlers = {}
+        self._handler_instances = {}

    def init_app(self, app: Flask):
        """Initialize cache regions"""
-        from common.utils.cache.regions import create_cache_regions
        self._regions = create_cache_regions(app)

        # Store regions in instance
        for region_name, region in self._regions.items():
            setattr(self, f"{region_name}_region", region)

-        # Initialize all registered handlers with their regions
-        for handler_class, region_name in self._handlers.items():
-            region = self._regions[region_name]
-            handler_instance = handler_class(region)
-            handler_name = getattr(handler_class, 'handler_name', None)
-            if handler_name:
-                app.logger.debug(f"{handler_name} is registered")
-                setattr(self, handler_name, handler_instance)
-
-        app.logger.info('Cache regions initialized: ' + ', '.join(self._regions.keys()))
+        app.logger.info(f'Cache regions initialized: {self._regions.keys()}')

    def register_handler(self, handler_class: Type[CacheHandler], region: str):
        """Register a cache handler class with its region"""
        if not hasattr(handler_class, 'handler_name'):
            raise ValueError("Cache handler must define handler_name class attribute")
        self._handlers[handler_class] = region
+
+        # Create handler instance
+        region_instance = self._regions[region]
+        handler_instance = handler_class(region_instance)
+        self._handler_instances[handler_class.handler_name] = handler_instance
+
+    def invalidate_region(self, region_name: str):
+        """Invalidate an entire cache region"""
+        if region_name in self._regions:
+            self._regions[region_name].invalidate()
+        else:
+            raise ValueError(f"Unknown cache region: {region_name}")
+
+    def __getattr__(self, name):
+        """Handle dynamic access to registered handlers"""
+        instances = object.__getattribute__(self, '_handler_instances')
+        if name in instances:
+            return instances[name]
+        raise AttributeError(f"'EveAICacheManager' object has no attribute '{name}'")
--- a/common/utils/cache/regions.py
+++ b/common/utils/cache/regions.py
@@ -1,4 +1,5 @@
 # common/utils/cache/regions.py
+import time

 from dogpile.cache import make_region
 from urllib.parse import urlparse
@@ -36,6 +37,7 @@ def create_cache_regions(app):
    """Initialize all cache regions with app config"""
    redis_config = get_redis_config(app)
    regions = {}
+    startup_time = int(time.time())

    # Region for model-related caching (ModelVariables etc)
    model_region = make_region(name='eveai_model').configure(
@@ -61,5 +63,16 @@ def create_cache_regions(app):
    )
    regions['eveai_workers'] = eveai_workers_region

+    eveai_config_region = make_region(name='eveai_config').configure(
+        'dogpile.cache.redis',
+        arguments={
+            **redis_config,
+            'redis_expiration_time': None,  # No expiration in Redis
+            'key_mangler': lambda key: f"startup_{startup_time}:{key}"  # Prefix all keys
+        },
+        replace_existing_backend=True
+    )
+    regions['eveai_config'] = eveai_config_region
+
    return regions

--- a/common/utils/document_utils.py
+++ b/common/utils/document_utils.py
@@ -7,7 +7,6 @@ from common.models.document import Document, DocumentVersion, Catalog
 from common.extensions import db, minio_client
 from common.utils.celery_utils import current_celery
 from flask import current_app
-from flask_security import current_user
 import requests
 from urllib.parse import urlparse, unquote, urlunparse
 import os
@@ -16,6 +15,7 @@ from .config_field_types import normalize_json_field
 from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
                               EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion, EveAIException)
 from ..models.user import Tenant
+from common.utils.model_logging_utils import set_logging_information, update_logging_information


 def create_document_stack(api_input, file, filename, extension, tenant_id):
@@ -136,35 +136,6 @@ def upload_file_for_version(doc_vers, file, extension, tenant_id):
        raise


-def set_logging_information(obj, timestamp):
-    obj.created_at = timestamp
-    obj.updated_at = timestamp
-
-    user_id = get_current_user_id()
-    if user_id:
-        obj.created_by = user_id
-        obj.updated_by = user_id
-
-
-def update_logging_information(obj, timestamp):
-    obj.updated_at = timestamp
-
-    user_id = get_current_user_id()
-    if user_id:
-        obj.updated_by = user_id
-
-
-def get_current_user_id():
-    try:
-        if current_user and current_user.is_authenticated:
-            return current_user.id
-        else:
-            return None
-    except Exception:
-        # This will catch any errors if current_user is not available (e.g., in API context)
-        return None
-
-
 def get_extension_from_content_type(content_type):
    content_type_map = {
        'text/html': 'html',
--- a/common/utils/model_logging_utils.py
+++ b/common/utils/model_logging_utils.py
@@ -0,0 +1,30 @@
+from flask_security import current_user
+
+
+def set_logging_information(obj, timestamp):
+    obj.created_at = timestamp
+    obj.updated_at = timestamp
+
+    user_id = get_current_user_id()
+    if user_id:
+        obj.created_by = user_id
+        obj.updated_by = user_id
+
+
+def update_logging_information(obj, timestamp):
+    obj.updated_at = timestamp
+
+    user_id = get_current_user_id()
+    if user_id:
+        obj.updated_by = user_id
+
+
+def get_current_user_id():
+    try:
+        if current_user and current_user.is_authenticated:
+            return current_user.id
+        else:
+            return None
+    except Exception:
+        # This will catch any errors if current_user is not available (e.g., in API context)
+        return None
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -227,62 +227,6 @@ class ModelVariables:
            raise


-class ModelVariablesCacheHandler(CacheHandler[ModelVariables]):
-    handler_name = 'model_vars_cache'  # Used to access handler instance from cache_manager
-
-    def __init__(self, region):
-        super().__init__(region, 'model_variables')
-        self.configure_keys('tenant_id')
-        self.subscribe_to_model('Tenant', ['tenant_id'])
-
-    def to_cache_data(self, instance: ModelVariables) -> Dict[str, Any]:
-        return {
-            'tenant_id': instance.tenant_id,
-            'variables': instance._variables,
-            'last_updated': dt.now(tz=tz.utc).isoformat()
-        }
-
-    def from_cache_data(self, data: Dict[str, Any], tenant_id: int, **kwargs) -> ModelVariables:
-        instance = ModelVariables(tenant_id, data.get('variables'))
-        return instance
-
-    def should_cache(self, value: Dict[str, Any]) -> bool:
-        required_fields = {'tenant_id', 'variables'}
-        return all(field in value for field in required_fields)
-
-
-# Register the handler with the cache manager
-cache_manager.register_handler(ModelVariablesCacheHandler, 'eveai_model')
-
-
 # Helper function to get cached model variables
 def get_model_variables(tenant_id: int) -> ModelVariables:
-    return cache_manager.model_vars_cache.get(
-        lambda tenant_id: ModelVariables(tenant_id),    # function to create ModelVariables if required
-        tenant_id=tenant_id
-    )
-
-# Written in a long format, without lambda
-# def get_model_variables(tenant_id: int) -> ModelVariables:
-#     """
-#     Get ModelVariables instance, either from cache or newly created
-#
-#     Args:
-#         tenant_id: The tenant's ID
-#
-#     Returns:
-#         ModelVariables: Instance with either cached or fresh data
-#
-#     Raises:
-#         TenantNotFoundError: If tenant doesn't exist
-#         CacheStateError: If cached data is invalid
-#     """
-#
-#     def create_new_instance(tenant_id: int) -> ModelVariables:
-#         """Creator function that's called when cache miss occurs"""
-#         return ModelVariables(tenant_id)  # This will initialize fresh variables
-#
-#     return cache_manager.model_vars_cache.get(
-#         create_new_instance,  # Function to create new instance if needed
-#         tenant_id=tenant_id  # Parameters passed to both get() and create_new_instance
-#     )
+    return ModelVariables(tenant_id=tenant_id)
--- a/common/utils/specialist_utils.py
+++ b/common/utils/specialist_utils.py
@@ -0,0 +1,192 @@
+from datetime import datetime as dt, timezone as tz
+from typing import Optional, Dict, Any
+from flask import current_app
+from sqlalchemy.exc import SQLAlchemyError
+
+from common.extensions import db, cache_manager
+from common.models.interaction import (
+    Specialist, EveAIAgent, EveAITask, EveAITool
+)
+from common.utils.model_logging_utils import set_logging_information, update_logging_information
+
+
+def initialize_specialist(specialist_id: int, specialist_type: str, specialist_version: str):
+    """
+    Initialize an agentic specialist by creating all its components based on configuration.
+
+    Args:
+        specialist_id: ID of the specialist to initialize
+        specialist_type: Type of the specialist
+        specialist_version: Version of the specialist type to use
+
+    Raises:
+        ValueError: If specialist not found or invalid configuration
+        SQLAlchemyError: If database operations fail
+    """
+    config = cache_manager.specialist_config_cache.get_config(specialist_type, specialist_version)
+    if not config:
+        raise ValueError(f"No configuration found for {specialist_type} version {specialist_version}")
+    if config['framework'] == 'langchain':
+        pass    # Langchain does not require additional items to be initialized. All configuration is in the specialist.
+
+    specialist = Specialist.query.get(specialist_id)
+    if not specialist:
+        raise ValueError(f"Specialist with ID {specialist_id} not found")
+
+    if config['framework'] == 'crewai':
+        initialize_crewai_specialist(specialist, config)
+
+
+def initialize_crewai_specialist(specialist: Specialist, config: Dict[str, Any]):
+    timestamp = dt.now(tz=tz.utc)
+
+    try:
+        # Initialize agents
+        if 'agents' in config:
+            for agent_config in config['agents']:
+                _create_agent(
+                    specialist_id=specialist.id,
+                    agent_type=agent_config['type'],
+                    agent_version=agent_config['version'],
+                    name=agent_config.get('name'),
+                    description=agent_config.get('description'),
+                    timestamp=timestamp
+                )
+
+        # Initialize tasks
+        if 'tasks' in config:
+            for task_config in config['tasks']:
+                _create_task(
+                    specialist_id=specialist.id,
+                    task_type=task_config['type'],
+                    task_version=task_config['version'],
+                    name=task_config.get('name'),
+                    description=task_config.get('description'),
+                    timestamp=timestamp
+                )
+
+        # Initialize tools
+        if 'tools' in config:
+            for tool_config in config['tools']:
+                _create_tool(
+                    specialist_id=specialist.id,
+                    tool_type=tool_config['type'],
+                    tool_version=tool_config['version'],
+                    name=tool_config.get('name'),
+                    description=tool_config.get('description'),
+                    timestamp=timestamp
+                )
+
+        db.session.commit()
+        current_app.logger.info(f"Successfully initialized crewai specialist {specialist.id}")
+
+    except SQLAlchemyError as e:
+        db.session.rollback()
+        current_app.logger.error(f"Database error initializing crewai specialist {specialist.id}: {str(e)}")
+        raise
+    except Exception as e:
+        db.session.rollback()
+        current_app.logger.error(f"Error initializing crewai specialist {specialist.id}: {str(e)}")
+        raise
+
+
+def _create_agent(
+        specialist_id: int,
+        agent_type: str,
+        agent_version: str,
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        timestamp: Optional[dt] = None
+) -> EveAIAgent:
+    """Create an agent with the given configuration."""
+    if timestamp is None:
+        timestamp = dt.now(tz=tz.utc)
+
+    # Get agent configuration from cache
+    agent_config = cache_manager.agent_config_cache.get_config(agent_type, agent_version)
+
+    agent = EveAIAgent(
+        specialist_id=specialist_id,
+        name=name or agent_config.get('name', agent_type),
+        description=description or agent_config.get('description', ''),
+        type=agent_type,
+        type_version=agent_version,
+        role=None,
+        goal=None,
+        backstory=None,
+        tuning=False,
+        configuration=None,
+        arguments=None
+    )
+
+    set_logging_information(agent, timestamp)
+
+    db.session.add(agent)
+    return agent
+
+
+def _create_task(
+        specialist_id: int,
+        task_type: str,
+        task_version: str,
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        timestamp: Optional[dt] = None
+) -> EveAITask:
+    """Create a task with the given configuration."""
+    if timestamp is None:
+        timestamp = dt.now(tz=tz.utc)
+
+    # Get task configuration from cache
+    task_config = cache_manager.task_config_cache.get_config(task_type, task_version)
+
+    task = EveAITask(
+        specialist_id=specialist_id,
+        name=name or task_config.get('name', task_type),
+        description=description or task_config.get('description', ''),
+        type=task_type,
+        type_version=task_version,
+        expected_output=None,
+        tuning=False,
+        configuration=None,
+        arguments=None,
+        context=None,
+        asynchronous=False,
+    )
+
+    set_logging_information(task, timestamp)
+
+    db.session.add(task)
+    return task
+
+
+def _create_tool(
+        specialist_id: int,
+        tool_type: str,
+        tool_version: str,
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        timestamp: Optional[dt] = None
+) -> EveAITool:
+    """Create a tool with the given configuration."""
+    if timestamp is None:
+        timestamp = dt.now(tz=tz.utc)
+
+    # Get tool configuration from cache
+    tool_config = cache_manager.tool_config_cache.get_config(tool_type, tool_version)
+
+    tool = EveAITool(
+        specialist_id=specialist_id,
+        name=name or tool_config.get('name', tool_type),
+        description=description or tool_config.get('description', ''),
+        type=tool_type,
+        type_version=tool_version,
+        tuning=False,
+        configuration=None,
+        arguments=None,
+    )
+
+    set_logging_information(tool, timestamp)
+
+    db.session.add(tool)
+    return tool
--- a/common/utils/startup_eveai.py
+++ b/common/utils/startup_eveai.py
@@ -0,0 +1,46 @@
+import time
+
+from redis import Redis
+
+from common.extensions import cache_manager
+
+
+def perform_startup_actions(app):
+    perform_startup_invalidation(app)
+
+
+def perform_startup_invalidation(app):
+    """
+    Perform cache invalidation only once during startup using a persistent marker (also called flag or semaphore
+    - see docs).
+    Uses a combination of lock and marker to ensure invalidation happens exactly once
+    per deployment.
+    """
+    redis_client = Redis.from_url(app.config['REDIS_BASE_URI'])
+    startup_time = int(time.time())
+    marker_key = 'startup_invalidation_completed'
+    lock_key = 'startup_invalidation_lock'
+
+    try:
+        # First try to get the lock
+        lock = redis_client.lock(lock_key, timeout=30)
+        if lock.acquire(blocking=False):
+            try:
+                # Check if invalidation was already performed
+                if not redis_client.get(marker_key):
+                    # Perform invalidation
+                    cache_manager.invalidate_region('eveai_config')
+                    # Set marker with 1 hour expiry (longer than any reasonable startup sequence)
+                    redis_client.setex(marker_key, 300, str(startup_time))
+                    app.logger.info("Startup cache invalidation completed")
+                else:
+                    app.logger.info("Startup cache invalidation already performed")
+            finally:
+                lock.release()
+        else:
+            app.logger.info("Another process is handling startup invalidation")
+
+    except Exception as e:
+        app.logger.error(f"Error during startup invalidation: {e}")
+        # In case of error, we don't want to block the application startup
+        pass