- Introduction of dynamic Retrievers & Specialists

- Introduction of dynamic Processors - Introduction of caching system - Introduction of a better template manager - Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists - Start adaptation of chat client
2024-11-15 10:00:53 +01:00
parent 55a8a95f79
commit 1807435339
101 changed files with 4181 additions and 1764 deletions
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -1,249 +1,36 @@
 import os
+from typing import Dict, Any, Optional

 import langcodes
-from flask import current_app
-from langchain_openai import OpenAIEmbeddings, ChatOpenAI
-from langchain_anthropic import ChatAnthropic
-from langchain_core.pydantic_v1 import BaseModel, Field
-from typing import List, Any, Iterator
-from collections.abc import MutableMapping
-from openai import OpenAI
-from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
-from portkey_ai.langchain.portkey_langchain_callback_handler import LangchainCallbackHandler

 from common.langchain.llm_metrics_handler import LLMMetricsHandler
+from common.langchain.templates.template_manager import TemplateManager
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI, OpenAI
+from langchain_anthropic import ChatAnthropic
+from flask import current_app
+from datetime import datetime as dt, timezone as tz
+
 from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
-from common.langchain.tracked_transcribe import tracked_transcribe
-from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI, Catalog
+from common.langchain.tracked_transcription import TrackedOpenAITranscription
 from common.models.user import Tenant
+from common.utils.cache.base import CacheHandler
 from config.model_config import MODEL_CONFIG
-from common.utils.business_event_context import current_event
+from common.extensions import template_manager, cache_manager
+from common.models.document import EmbeddingLargeOpenAI, EmbeddingSmallOpenAI
+from common.utils.eveai_exceptions import EveAITenantNotFound


-class CitedAnswer(BaseModel):
-    """Default docstring - to be replaced with actual prompt"""
+def create_language_template(template: str, language: str) -> str:
+    """
+    Replace language placeholder in template with specified language

-    answer: str = Field(
-        ...,
-        description="The answer to the user question, based on the given sources",
-    )
-    citations: List[int] = Field(
-        ...,
-        description="The integer IDs of the SPECIFIC sources that were used to generate the answer"
-    )
-    insufficient_info: bool = Field(
-        False,  # Default value is set to False
-        description="A boolean indicating wether given sources were sufficient or not to generate the answer"
-    )
+    Args:
+        template: Template string with {language} placeholder
+        language: Language code to insert

-
-def set_language_prompt_template(cls, language_prompt):
-    cls.__doc__ = language_prompt
-
-
-class ModelVariables(MutableMapping):
-    def __init__(self, tenant: Tenant, catalog_id=None):
-        self.tenant = tenant
-        self.catalog_id = catalog_id
-        self._variables = self._initialize_variables()
-        self._embedding_model = None
-        self._llm = None
-        self._llm_no_rag = None
-        self._transcription_client = None
-        self._prompt_templates = {}
-        self._embedding_db_model = None
-        self.llm_metrics_handler = LLMMetricsHandler()
-        self._transcription_client = None
-
-    def _initialize_variables(self):
-        variables = {}
-
-        # Get the Catalog if catalog_id is passed
-        if self.catalog_id:
-            catalog = Catalog.query.get_or_404(self.catalog_id)
-
-            # We initialize the variables that are available knowing the tenant.
-            variables['embed_tuning'] = catalog.embed_tuning or False
-
-            # Set HTML Chunking Variables
-            variables['html_tags'] = catalog.html_tags
-            variables['html_end_tags'] = catalog.html_end_tags
-            variables['html_included_elements'] = catalog.html_included_elements
-            variables['html_excluded_elements'] = catalog.html_excluded_elements
-            variables['html_excluded_classes'] = catalog.html_excluded_classes
-
-            # Set Chunk Size variables
-            variables['min_chunk_size'] = catalog.min_chunk_size
-            variables['max_chunk_size'] = catalog.max_chunk_size
-
-        # Set the RAG Context (will have to change once specialists are defined
-        variables['rag_context'] = self.tenant.rag_context or " "
-        # Temporary setting until we have Specialists
-        variables['rag_tuning'] = False
-        variables['RAG_temperature'] = 0.3
-        variables['no_RAG_temperature'] = 0.5
-        variables['k'] = 8
-        variables['similarity_threshold'] = 0.4
-
-        # Set model providers
-        variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
-        variables['llm_provider'], variables['llm_model'] = self.tenant.llm_model.rsplit('.', 1)
-        variables["templates"] = current_app.config['PROMPT_TEMPLATES'][(f"{variables['llm_provider']}."
-                                                                         f"{variables['llm_model']}")]
-        current_app.logger.info(f"Loaded prompt templates: \n")
-        current_app.logger.info(f"{variables['templates']}")
-
-        # Set model-specific configurations
-        model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {})
-        variables.update(model_config)
-
-        variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][self.tenant.llm_model]
-
-        if variables['tool_calling_supported']:
-            variables['cited_answer_cls'] = CitedAnswer
-
-        variables['max_compression_duration'] = current_app.config['MAX_COMPRESSION_DURATION']
-        variables['max_transcription_duration'] = current_app.config['MAX_TRANSCRIPTION_DURATION']
-        variables['compression_cpu_limit'] = current_app.config['COMPRESSION_CPU_LIMIT']
-        variables['compression_process_delay'] = current_app.config['COMPRESSION_PROCESS_DELAY']
-
-        return variables
-
-    @property
-    def embedding_model(self):
-        api_key = os.getenv('OPENAI_API_KEY')
-        model = self._variables['embedding_model']
-        self._embedding_model = TrackedOpenAIEmbeddings(api_key=api_key,
-                                                        model=model,
-                                                        )
-        self._embedding_db_model = EmbeddingSmallOpenAI \
-            if model == 'text-embedding-3-small' \
-            else EmbeddingLargeOpenAI
-
-        return self._embedding_model
-
-    @property
-    def llm(self):
-        api_key = self.get_api_key_for_llm()
-        self._llm = ChatOpenAI(api_key=api_key,
-                               model=self._variables['llm_model'],
-                               temperature=self._variables['RAG_temperature'],
-                               callbacks=[self.llm_metrics_handler])
-        return self._llm
-
-    @property
-    def llm_no_rag(self):
-        api_key = self.get_api_key_for_llm()
-        self._llm_no_rag = ChatOpenAI(api_key=api_key,
-                                      model=self._variables['llm_model'],
-                                      temperature=self._variables['RAG_temperature'],
-                                      callbacks=[self.llm_metrics_handler])
-        return self._llm_no_rag
-
-    def get_api_key_for_llm(self):
-        if self._variables['llm_provider'] == 'openai':
-            api_key = os.getenv('OPENAI_API_KEY')
-        else:  # self._variables['llm_provider'] == 'anthropic'
-            api_key = os.getenv('ANTHROPIC_API_KEY')
-
-        return api_key
-
-    @property
-    def transcription_client(self):
-        api_key = os.getenv('OPENAI_API_KEY')
-        self._transcription_client = OpenAI(api_key=api_key, )
-        self._variables['transcription_model'] = 'whisper-1'
-        return self._transcription_client
-
-    def transcribe(self, *args, **kwargs):
-        return tracked_transcribe(self._transcription_client, *args, **kwargs)
-
-    @property
-    def embedding_db_model(self):
-        if self._embedding_db_model is None:
-            self._embedding_db_model = self.get_embedding_db_model()
-        return self._embedding_db_model
-
-    def get_embedding_db_model(self):
-        current_app.logger.debug("In get_embedding_db_model")
-        if self._embedding_db_model is None:
-            self._embedding_db_model = EmbeddingSmallOpenAI \
-                if self._variables['embedding_model'] == 'text-embedding-3-small' \
-                else EmbeddingLargeOpenAI
-        current_app.logger.debug(f"Embedding DB Model: {self._embedding_db_model}")
-        return self._embedding_db_model
-
-    def get_prompt_template(self, template_name: str) -> str:
-        current_app.logger.info(f"Getting prompt template for {template_name}")
-        if template_name not in self._prompt_templates:
-            self._prompt_templates[template_name] = self._load_prompt_template(template_name)
-        return self._prompt_templates[template_name]
-
-    def _load_prompt_template(self, template_name: str) -> str:
-        # In the future, this method will make an API call to Portkey
-        # For now, we'll simulate it with a placeholder implementation
-        # You can replace this with your current prompt loading logic
-        return self._variables['templates'][template_name]
-
-    def __getitem__(self, key: str) -> Any:
-        current_app.logger.debug(f"ModelVariables: Getting {key}")
-        # Support older template names (suffix = _template)
-        if key.endswith('_template'):
-            key = key[:-len('_template')]
-            current_app.logger.debug(f"ModelVariables: Getting modified {key}")
-        if key == 'embedding_model':
-            return self.embedding_model
-        elif key == 'embedding_db_model':
-            return self.embedding_db_model
-        elif key == 'llm':
-            return self.llm
-        elif key == 'llm_no_rag':
-            return self.llm_no_rag
-        elif key == 'transcription_client':
-            return self.transcription_client
-        elif key in self._variables.get('prompt_templates', []):
-            return self.get_prompt_template(key)
-        else:
-            value = self._variables.get(key)
-            if value is not None:
-                return value
-            else:
-                raise KeyError(f'Variable {key} does not exist in ModelVariables')
-
-    def __setitem__(self, key: str, value: Any) -> None:
-        self._variables[key] = value
-
-    def __delitem__(self, key: str) -> None:
-        del self._variables[key]
-
-    def __iter__(self) -> Iterator[str]:
-        return iter(self._variables)
-
-    def __len__(self):
-        return len(self._variables)
-
-    def get(self, key: str, default: Any = None) -> Any:
-        return self.__getitem__(key) or default
-
-    def update(self, **kwargs) -> None:
-        self._variables.update(kwargs)
-
-    def items(self):
-        return self._variables.items()
-
-    def keys(self):
-        return self._variables.keys()
-
-    def values(self):
-        return self._variables.values()
-
-
-def select_model_variables(tenant, catalog_id=None):
-    model_variables = ModelVariables(tenant=tenant, catalog_id=catalog_id)
-    return model_variables
-
-
-def create_language_template(template, language):
+    Returns:
+        str: Template with language placeholder replaced
+    """
    try:
        full_language = langcodes.Language.make(language=language)
        language_template = template.replace('{language}', full_language.display_name())
@@ -253,5 +40,249 @@ def create_language_template(template, language):
    return language_template


-def replace_variable_in_template(template, variable, value):
-    return template.replace(variable, value)
+def replace_variable_in_template(template: str, variable: str, value: str) -> str:
+    """
+    Replace a variable placeholder in template with specified value
+
+    Args:
+        template: Template string with variable placeholder
+        variable: Variable placeholder to replace (e.g. "{tenant_context}")
+        value: Value to insert
+
+    Returns:
+        str: Template with variable placeholder replaced
+    """
+    return template.replace(variable, value or "")
+
+
+class ModelVariables:
+    """Manages model-related variables and configurations"""
+
+    def __init__(self, tenant_id: int, variables: Dict[str, Any] = None):
+        """
+        Initialize ModelVariables with tenant and optional template manager
+
+        Args:
+            tenant: Tenant instance
+            template_manager: Optional TemplateManager instance
+        """
+        current_app.logger.info(f'Model variables initialized with tenant {tenant_id} and variables \n{variables}')
+        self.tenant_id = tenant_id
+        self._variables = variables if variables is not None else self._initialize_variables()
+        current_app.logger.info(f'Model _variables initialized to {self._variables}')
+        self._embedding_model = None
+        self._embedding_model_class = None
+        self._llm_instances = {}
+        self.llm_metrics_handler = LLMMetricsHandler()
+        self._transcription_model = None
+
+    def _initialize_variables(self) -> Dict[str, Any]:
+        """Initialize the variables dictionary"""
+        variables = {}
+
+        tenant = Tenant.query.get(self.tenant_id)
+        if not tenant:
+            raise EveAITenantNotFound(f"Tenant {self.tenant_id} not found")
+
+        # Set model providers
+        variables['embedding_provider'], variables['embedding_model'] = tenant.embedding_model.split('.')
+        variables['llm_provider'], variables['llm_model'] = tenant.llm_model.split('.')
+        variables['llm_full_model'] = tenant.llm_model
+
+        # Set model-specific configurations
+        model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {})
+        variables.update(model_config)
+
+        # Additional configurations
+        variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model]
+        variables['max_compression_duration'] = current_app.config['MAX_COMPRESSION_DURATION']
+        variables['max_transcription_duration'] = current_app.config['MAX_TRANSCRIPTION_DURATION']
+        variables['compression_cpu_limit'] = current_app.config['COMPRESSION_CPU_LIMIT']
+        variables['compression_process_delay'] = current_app.config['COMPRESSION_PROCESS_DELAY']
+
+        return variables
+
+    @property
+    def embedding_model(self):
+        """Get the embedding model instance"""
+        if self._embedding_model is None:
+            api_key = os.getenv('OPENAI_API_KEY')
+            self._embedding_model = TrackedOpenAIEmbeddings(
+                api_key=api_key,
+                model=self._variables['embedding_model']
+            )
+        return self._embedding_model
+
+    @property
+    def embedding_model_class(self):
+        """Get the embedding model class"""
+        if self._embedding_model_class is None:
+            if self._variables['embedding_model'] == 'text-embedding-3-large':
+                self._embedding_model_class = EmbeddingLargeOpenAI
+            else:   # text-embedding-3-small
+                self._embedding_model_class = EmbeddingSmallOpenAI
+
+        return self._embedding_model_class
+
+    @property
+    def annotation_chunk_length(self):
+        return self._variables['annotation_chunk_length']
+
+    @property
+    def max_compression_duration(self):
+        return self._variables['max_compression_duration']
+
+    @property
+    def max_transcription_duration(self):
+        return self._variables['max_transcription_duration']
+
+    @property
+    def compression_cpu_limit(self):
+        return self._variables['compression_cpu_limit']
+
+    @property
+    def compression_process_delay(self):
+        return self._variables['compression_process_delay']
+
+    def get_llm(self, temperature: float = 0.3, **kwargs) -> Any:
+        """
+        Get an LLM instance with specific configuration
+
+        Args:
+            temperature: The temperature for the LLM
+            **kwargs: Additional configuration parameters
+
+        Returns:
+            An instance of the configured LLM
+        """
+        cache_key = f"{temperature}_{hash(frozenset(kwargs.items()))}"
+
+        if cache_key not in self._llm_instances:
+            provider = self._variables['llm_provider']
+            model = self._variables['llm_model']
+
+            if provider == 'openai':
+                self._llm_instances[cache_key] = ChatOpenAI(
+                    api_key=os.getenv('OPENAI_API_KEY'),
+                    model=model,
+                    temperature=temperature,
+                    callbacks=[self.llm_metrics_handler],
+                    **kwargs
+                )
+            elif provider == 'anthropic':
+                self._llm_instances[cache_key] = ChatAnthropic(
+                    api_key=os.getenv('ANTHROPIC_API_KEY'),
+                    model=current_app.config['ANTHROPIC_LLM_VERSIONS'][model],
+                    temperature=temperature,
+                    callbacks=[self.llm_metrics_handler],
+                    **kwargs
+                )
+            else:
+                raise ValueError(f"Unsupported LLM provider: {provider}")
+
+        return self._llm_instances[cache_key]
+
+    @property
+    def transcription_model(self) -> TrackedOpenAITranscription:
+        """Get the transcription model instance"""
+        if self._transcription_model is None:
+            api_key = os.getenv('OPENAI_API_KEY')
+            self._transcription_model = TrackedOpenAITranscription(
+                api_key=api_key,
+                model='whisper-1'
+            )
+        return self._transcription_model
+
+    # Remove the old transcription-related methods since they're now handled by TrackedOpenAITranscription
+    @property
+    def transcription_client(self):
+        raise DeprecationWarning("Use transcription_model instead")
+
+    def transcribe(self, *args, **kwargs):
+        raise DeprecationWarning("Use transcription_model.transcribe() instead")
+
+    def get_template(self, template_name: str, version: Optional[str] = None) -> str:
+        """
+        Get a template for the tenant's configured LLM
+
+        Args:
+            template_name: Name of the template to retrieve
+            version: Optional specific version to retrieve
+
+        Returns:
+            The template content
+        """
+        try:
+            template = template_manager.get_template(
+                self._variables['llm_full_model'],
+                template_name,
+                version
+            )
+            return template.content
+        except Exception as e:
+            current_app.logger.error(f"Error getting template {template_name}: {str(e)}")
+            # Fall back to old template loading if template_manager fails
+            if template_name in self._variables.get('templates', {}):
+                return self._variables['templates'][template_name]
+            raise
+
+
+class ModelVariablesCacheHandler(CacheHandler[ModelVariables]):
+    handler_name = 'model_vars_cache'  # Used to access handler instance from cache_manager
+
+    def __init__(self, region):
+        super().__init__(region, 'model_variables')
+        self.configure_keys('tenant_id')
+        self.subscribe_to_model('Tenant', ['tenant_id'])
+
+    def to_cache_data(self, instance: ModelVariables) -> Dict[str, Any]:
+        return {
+            'tenant_id': instance.tenant_id,
+            'variables': instance._variables,
+            'last_updated': dt.now(tz=tz.utc).isoformat()
+        }
+
+    def from_cache_data(self, data: Dict[str, Any], tenant_id: int, **kwargs) -> ModelVariables:
+        instance = ModelVariables(tenant_id, data.get('variables'))
+        return instance
+
+    def should_cache(self, value: Dict[str, Any]) -> bool:
+        required_fields = {'tenant_id', 'variables'}
+        return all(field in value for field in required_fields)
+
+
+# Register the handler with the cache manager
+cache_manager.register_handler(ModelVariablesCacheHandler, 'model')
+
+
+# Helper function to get cached model variables
+def get_model_variables(tenant_id: int) -> ModelVariables:
+    return cache_manager.model_vars_cache.get(
+        lambda tenant_id: ModelVariables(tenant_id),    # function to create ModelVariables if required
+        tenant_id=tenant_id
+    )
+
+# Written in a long format, without lambda
+# def get_model_variables(tenant_id: int) -> ModelVariables:
+#     """
+#     Get ModelVariables instance, either from cache or newly created
+#
+#     Args:
+#         tenant_id: The tenant's ID
+#
+#     Returns:
+#         ModelVariables: Instance with either cached or fresh data
+#
+#     Raises:
+#         TenantNotFoundError: If tenant doesn't exist
+#         CacheStateError: If cached data is invalid
+#     """
+#
+#     def create_new_instance(tenant_id: int) -> ModelVariables:
+#         """Creator function that's called when cache miss occurs"""
+#         return ModelVariables(tenant_id)  # This will initialize fresh variables
+#
+#     return cache_manager.model_vars_cache.get(
+#         create_new_instance,  # Function to create new instance if needed
+#         tenant_id=tenant_id  # Parameters passed to both get() and create_new_instance
+#     )