Remove ModelVariables (model_utils) from application & optimize Tenant

2025-05-20 10:17:08 +02:00
parent 70de4c0328
commit d789e431ca
17 changed files with 83 additions and 206 deletions
--- a/common/models/user.py
+++ b/common/models/user.py
@@ -24,17 +24,13 @@ class Tenant(db.Model):
    name = db.Column(db.String(80), unique=True, nullable=False)
    website = db.Column(db.String(255), nullable=True)
    timezone = db.Column(db.String(50), nullable=True, default='UTC')
-    rag_context = db.Column(db.Text, nullable=True)
    type = db.Column(db.String(20), nullable=True, server_default='Active')

    # language information
    default_language = db.Column(db.String(2), nullable=True)
    allowed_languages = db.Column(ARRAY(sa.String(2)), nullable=True)

-    # LLM specific choices
-    llm_model = db.Column(db.String(50), nullable=True)
-
-    # Entitlements
+   # Entitlements
    currency = db.Column(db.String(20), nullable=True)
    storage_dirty = db.Column(db.Boolean, nullable=True, default=False)

@@ -62,11 +58,9 @@ class Tenant(db.Model):
            'name': self.name,
            'website': self.website,
            'timezone': self.timezone,
-            'rag_context': self.rag_context,
            'type': self.type,
            'default_language': self.default_language,
            'allowed_languages': self.allowed_languages,
-            'llm_model': self.llm_model,
            'currency': self.currency,
        }

--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -139,142 +139,26 @@ def process_pdf():
    full_model_name = 'mistral-ocr-latest'


-def get_template(template_name: str, version: Optional[str] = "1.0") -> tuple[
+def get_template(template_name: str, version: Optional[str] = "1.0", temperature: float = 0.3) -> tuple[
    Any, BaseChatModel | None | ChatOpenAI | ChatMistralAI]:
    """
    Get a prompt template
    """
    prompt = cache_manager.prompts_config_cache.get_config(template_name, version)
    if "llm_model" in prompt:
-        llm = get_embedding_llm(full_model_name=prompt["llm_model"])
+        llm = get_embedding_llm(full_model_name=prompt["llm_model"], temperature=temperature)
    else:
-        llm = get_embedding_llm()
+        llm = get_embedding_llm(temperature=temperature)

    return prompt["content"], llm


-class ModelVariables:
-    """Manages model-related variables and configurations"""
-
-    def __init__(self, tenant_id: int, variables: Dict[str, Any] = None):
-        """
-        Initialize ModelVariables with tenant and optional template manager
-
-        Args:
-            tenant_id: Tenant instance
-            variables: Optional variables
-        """
-        current_app.logger.info(f'Model variables initialized with tenant {tenant_id} and variables \n{variables}')
-        self.tenant_id = tenant_id
-        self._variables = variables if variables is not None else self._initialize_variables()
-        current_app.logger.info(f'Model _variables initialized to {self._variables}')
-        self._llm_instances = {}
-        self.llm_metrics_handler = LLMMetricsHandler()
-        self._transcription_model = None
-
-    def _initialize_variables(self) -> Dict[str, Any]:
-        """Initialize the variables dictionary"""
-        variables = {}
-
-        tenant = Tenant.query.get(self.tenant_id)
-        if not tenant:
-            raise EveAITenantNotFound(self.tenant_id)
-
-        # Set model providers
-        variables['llm_provider'], variables['llm_model'] = tenant.llm_model.split('.')
-        variables['llm_full_model'] = tenant.llm_model
-
-        # Set model-specific configurations
-        model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {})
-        variables.update(model_config)
-
-        # Additional configurations
-        variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model]
-        variables['max_compression_duration'] = current_app.config['MAX_COMPRESSION_DURATION']
-        variables['max_transcription_duration'] = current_app.config['MAX_TRANSCRIPTION_DURATION']
-        variables['compression_cpu_limit'] = current_app.config['COMPRESSION_CPU_LIMIT']
-        variables['compression_process_delay'] = current_app.config['COMPRESSION_PROCESS_DELAY']
-
-        return variables
-
-    @property
-    def annotation_chunk_length(self):
-        return self._variables['annotation_chunk_length']
-
-    @property
-    def max_compression_duration(self):
-        return self._variables['max_compression_duration']
-
-    @property
-    def max_transcription_duration(self):
-        return self._variables['max_transcription_duration']
-
-    @property
-    def compression_cpu_limit(self):
-        return self._variables['compression_cpu_limit']
-
-    @property
-    def compression_process_delay(self):
-        return self._variables['compression_process_delay']
-
-    def get_llm(self, temperature: float = 0.3, **kwargs) -> Any:
-        """
-        Get an LLM instance with specific configuration
-
-        Args:
-            temperature: The temperature for the LLM
-            **kwargs: Additional configuration parameters
-
-        Returns:
-            An instance of the configured LLM
-        """
-        cache_key = f"{temperature}_{hash(frozenset(kwargs.items()))}"
-
-        if cache_key not in self._llm_instances:
-            provider = self._variables['llm_provider']
-            model = self._variables['llm_model']
-
-            if provider == 'openai':
-                self._llm_instances[cache_key] = ChatOpenAI(
-                    api_key=os.getenv('OPENAI_API_KEY'),
-                    model=model,
-                    temperature=temperature,
-                    callbacks=[self.llm_metrics_handler],
-                    **kwargs
-                )
-            elif provider == 'anthropic':
-                self._llm_instances[cache_key] = ChatAnthropic(
-                    api_key=os.getenv('ANTHROPIC_API_KEY'),
-                    model=current_app.config['ANTHROPIC_LLM_VERSIONS'][model],
-                    temperature=temperature,
-                    callbacks=[self.llm_metrics_handler],
-                    **kwargs
-                )
-            else:
-                raise ValueError(f"Unsupported LLM provider: {provider}")
-
-        return self._llm_instances[cache_key]
-
-    @property
-    def transcription_model(self) -> TrackedOpenAITranscription:
-        """Get the transcription model instance"""
-        if self._transcription_model is None:
-            api_key = os.getenv('OPENAI_API_KEY')
-            self._transcription_model = TrackedOpenAITranscription(
-                api_key=api_key,
-                model='whisper-1'
-            )
-        return self._transcription_model
-
-    # Remove the old transcription-related methods since they're now handled by TrackedOpenAITranscription
-    @property
-    def transcription_client(self):
-        raise DeprecationWarning("Use transcription_model instead")
-
-    def transcribe(self, *args, **kwargs):
-        raise DeprecationWarning("Use transcription_model.transcribe() instead")
-
-
-# Helper function to get cached model variables
-def get_model_variables(tenant_id: int) -> ModelVariables:
-    return ModelVariables(tenant_id=tenant_id)
+def get_transcription_model(model_name: str = "whisper-1") -> TrackedOpenAITranscription:
+    """
+    Get a transcription model instance
+    """
+    api_key = os.getenv('OPENAI_API_KEY')
+    return TrackedOpenAITranscription(
+        api_key=api_key,
+        model=model_name
+    )