- Introduction of dynamic Retrievers & Specialists
- Introduction of dynamic Processors - Introduction of caching system - Introduction of a better template manager - Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists - Start adaptation of chat client
This commit is contained in:
@@ -1,249 +1,36 @@
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
import langcodes
|
||||
from flask import current_app
|
||||
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from typing import List, Any, Iterator
|
||||
from collections.abc import MutableMapping
|
||||
from openai import OpenAI
|
||||
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
|
||||
from portkey_ai.langchain.portkey_langchain_callback_handler import LangchainCallbackHandler
|
||||
|
||||
from common.langchain.llm_metrics_handler import LLMMetricsHandler
|
||||
from common.langchain.templates.template_manager import TemplateManager
|
||||
from langchain_openai import OpenAIEmbeddings, ChatOpenAI, OpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from flask import current_app
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
|
||||
from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
|
||||
from common.langchain.tracked_transcribe import tracked_transcribe
|
||||
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI, Catalog
|
||||
from common.langchain.tracked_transcription import TrackedOpenAITranscription
|
||||
from common.models.user import Tenant
|
||||
from common.utils.cache.base import CacheHandler
|
||||
from config.model_config import MODEL_CONFIG
|
||||
from common.utils.business_event_context import current_event
|
||||
from common.extensions import template_manager, cache_manager
|
||||
from common.models.document import EmbeddingLargeOpenAI, EmbeddingSmallOpenAI
|
||||
from common.utils.eveai_exceptions import EveAITenantNotFound
|
||||
|
||||
|
||||
class CitedAnswer(BaseModel):
|
||||
"""Default docstring - to be replaced with actual prompt"""
|
||||
def create_language_template(template: str, language: str) -> str:
|
||||
"""
|
||||
Replace language placeholder in template with specified language
|
||||
|
||||
answer: str = Field(
|
||||
...,
|
||||
description="The answer to the user question, based on the given sources",
|
||||
)
|
||||
citations: List[int] = Field(
|
||||
...,
|
||||
description="The integer IDs of the SPECIFIC sources that were used to generate the answer"
|
||||
)
|
||||
insufficient_info: bool = Field(
|
||||
False, # Default value is set to False
|
||||
description="A boolean indicating wether given sources were sufficient or not to generate the answer"
|
||||
)
|
||||
Args:
|
||||
template: Template string with {language} placeholder
|
||||
language: Language code to insert
|
||||
|
||||
|
||||
def set_language_prompt_template(cls, language_prompt):
|
||||
cls.__doc__ = language_prompt
|
||||
|
||||
|
||||
class ModelVariables(MutableMapping):
|
||||
def __init__(self, tenant: Tenant, catalog_id=None):
|
||||
self.tenant = tenant
|
||||
self.catalog_id = catalog_id
|
||||
self._variables = self._initialize_variables()
|
||||
self._embedding_model = None
|
||||
self._llm = None
|
||||
self._llm_no_rag = None
|
||||
self._transcription_client = None
|
||||
self._prompt_templates = {}
|
||||
self._embedding_db_model = None
|
||||
self.llm_metrics_handler = LLMMetricsHandler()
|
||||
self._transcription_client = None
|
||||
|
||||
def _initialize_variables(self):
|
||||
variables = {}
|
||||
|
||||
# Get the Catalog if catalog_id is passed
|
||||
if self.catalog_id:
|
||||
catalog = Catalog.query.get_or_404(self.catalog_id)
|
||||
|
||||
# We initialize the variables that are available knowing the tenant.
|
||||
variables['embed_tuning'] = catalog.embed_tuning or False
|
||||
|
||||
# Set HTML Chunking Variables
|
||||
variables['html_tags'] = catalog.html_tags
|
||||
variables['html_end_tags'] = catalog.html_end_tags
|
||||
variables['html_included_elements'] = catalog.html_included_elements
|
||||
variables['html_excluded_elements'] = catalog.html_excluded_elements
|
||||
variables['html_excluded_classes'] = catalog.html_excluded_classes
|
||||
|
||||
# Set Chunk Size variables
|
||||
variables['min_chunk_size'] = catalog.min_chunk_size
|
||||
variables['max_chunk_size'] = catalog.max_chunk_size
|
||||
|
||||
# Set the RAG Context (will have to change once specialists are defined
|
||||
variables['rag_context'] = self.tenant.rag_context or " "
|
||||
# Temporary setting until we have Specialists
|
||||
variables['rag_tuning'] = False
|
||||
variables['RAG_temperature'] = 0.3
|
||||
variables['no_RAG_temperature'] = 0.5
|
||||
variables['k'] = 8
|
||||
variables['similarity_threshold'] = 0.4
|
||||
|
||||
# Set model providers
|
||||
variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
|
||||
variables['llm_provider'], variables['llm_model'] = self.tenant.llm_model.rsplit('.', 1)
|
||||
variables["templates"] = current_app.config['PROMPT_TEMPLATES'][(f"{variables['llm_provider']}."
|
||||
f"{variables['llm_model']}")]
|
||||
current_app.logger.info(f"Loaded prompt templates: \n")
|
||||
current_app.logger.info(f"{variables['templates']}")
|
||||
|
||||
# Set model-specific configurations
|
||||
model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {})
|
||||
variables.update(model_config)
|
||||
|
||||
variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][self.tenant.llm_model]
|
||||
|
||||
if variables['tool_calling_supported']:
|
||||
variables['cited_answer_cls'] = CitedAnswer
|
||||
|
||||
variables['max_compression_duration'] = current_app.config['MAX_COMPRESSION_DURATION']
|
||||
variables['max_transcription_duration'] = current_app.config['MAX_TRANSCRIPTION_DURATION']
|
||||
variables['compression_cpu_limit'] = current_app.config['COMPRESSION_CPU_LIMIT']
|
||||
variables['compression_process_delay'] = current_app.config['COMPRESSION_PROCESS_DELAY']
|
||||
|
||||
return variables
|
||||
|
||||
@property
|
||||
def embedding_model(self):
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
model = self._variables['embedding_model']
|
||||
self._embedding_model = TrackedOpenAIEmbeddings(api_key=api_key,
|
||||
model=model,
|
||||
)
|
||||
self._embedding_db_model = EmbeddingSmallOpenAI \
|
||||
if model == 'text-embedding-3-small' \
|
||||
else EmbeddingLargeOpenAI
|
||||
|
||||
return self._embedding_model
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
api_key = self.get_api_key_for_llm()
|
||||
self._llm = ChatOpenAI(api_key=api_key,
|
||||
model=self._variables['llm_model'],
|
||||
temperature=self._variables['RAG_temperature'],
|
||||
callbacks=[self.llm_metrics_handler])
|
||||
return self._llm
|
||||
|
||||
@property
|
||||
def llm_no_rag(self):
|
||||
api_key = self.get_api_key_for_llm()
|
||||
self._llm_no_rag = ChatOpenAI(api_key=api_key,
|
||||
model=self._variables['llm_model'],
|
||||
temperature=self._variables['RAG_temperature'],
|
||||
callbacks=[self.llm_metrics_handler])
|
||||
return self._llm_no_rag
|
||||
|
||||
def get_api_key_for_llm(self):
|
||||
if self._variables['llm_provider'] == 'openai':
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
else: # self._variables['llm_provider'] == 'anthropic'
|
||||
api_key = os.getenv('ANTHROPIC_API_KEY')
|
||||
|
||||
return api_key
|
||||
|
||||
@property
|
||||
def transcription_client(self):
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
self._transcription_client = OpenAI(api_key=api_key, )
|
||||
self._variables['transcription_model'] = 'whisper-1'
|
||||
return self._transcription_client
|
||||
|
||||
def transcribe(self, *args, **kwargs):
|
||||
return tracked_transcribe(self._transcription_client, *args, **kwargs)
|
||||
|
||||
@property
|
||||
def embedding_db_model(self):
|
||||
if self._embedding_db_model is None:
|
||||
self._embedding_db_model = self.get_embedding_db_model()
|
||||
return self._embedding_db_model
|
||||
|
||||
def get_embedding_db_model(self):
|
||||
current_app.logger.debug("In get_embedding_db_model")
|
||||
if self._embedding_db_model is None:
|
||||
self._embedding_db_model = EmbeddingSmallOpenAI \
|
||||
if self._variables['embedding_model'] == 'text-embedding-3-small' \
|
||||
else EmbeddingLargeOpenAI
|
||||
current_app.logger.debug(f"Embedding DB Model: {self._embedding_db_model}")
|
||||
return self._embedding_db_model
|
||||
|
||||
def get_prompt_template(self, template_name: str) -> str:
|
||||
current_app.logger.info(f"Getting prompt template for {template_name}")
|
||||
if template_name not in self._prompt_templates:
|
||||
self._prompt_templates[template_name] = self._load_prompt_template(template_name)
|
||||
return self._prompt_templates[template_name]
|
||||
|
||||
def _load_prompt_template(self, template_name: str) -> str:
|
||||
# In the future, this method will make an API call to Portkey
|
||||
# For now, we'll simulate it with a placeholder implementation
|
||||
# You can replace this with your current prompt loading logic
|
||||
return self._variables['templates'][template_name]
|
||||
|
||||
def __getitem__(self, key: str) -> Any:
|
||||
current_app.logger.debug(f"ModelVariables: Getting {key}")
|
||||
# Support older template names (suffix = _template)
|
||||
if key.endswith('_template'):
|
||||
key = key[:-len('_template')]
|
||||
current_app.logger.debug(f"ModelVariables: Getting modified {key}")
|
||||
if key == 'embedding_model':
|
||||
return self.embedding_model
|
||||
elif key == 'embedding_db_model':
|
||||
return self.embedding_db_model
|
||||
elif key == 'llm':
|
||||
return self.llm
|
||||
elif key == 'llm_no_rag':
|
||||
return self.llm_no_rag
|
||||
elif key == 'transcription_client':
|
||||
return self.transcription_client
|
||||
elif key in self._variables.get('prompt_templates', []):
|
||||
return self.get_prompt_template(key)
|
||||
else:
|
||||
value = self._variables.get(key)
|
||||
if value is not None:
|
||||
return value
|
||||
else:
|
||||
raise KeyError(f'Variable {key} does not exist in ModelVariables')
|
||||
|
||||
def __setitem__(self, key: str, value: Any) -> None:
|
||||
self._variables[key] = value
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
del self._variables[key]
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
return iter(self._variables)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._variables)
|
||||
|
||||
def get(self, key: str, default: Any = None) -> Any:
|
||||
return self.__getitem__(key) or default
|
||||
|
||||
def update(self, **kwargs) -> None:
|
||||
self._variables.update(kwargs)
|
||||
|
||||
def items(self):
|
||||
return self._variables.items()
|
||||
|
||||
def keys(self):
|
||||
return self._variables.keys()
|
||||
|
||||
def values(self):
|
||||
return self._variables.values()
|
||||
|
||||
|
||||
def select_model_variables(tenant, catalog_id=None):
|
||||
model_variables = ModelVariables(tenant=tenant, catalog_id=catalog_id)
|
||||
return model_variables
|
||||
|
||||
|
||||
def create_language_template(template, language):
|
||||
Returns:
|
||||
str: Template with language placeholder replaced
|
||||
"""
|
||||
try:
|
||||
full_language = langcodes.Language.make(language=language)
|
||||
language_template = template.replace('{language}', full_language.display_name())
|
||||
@@ -253,5 +40,249 @@ def create_language_template(template, language):
|
||||
return language_template
|
||||
|
||||
|
||||
def replace_variable_in_template(template, variable, value):
|
||||
return template.replace(variable, value)
|
||||
def replace_variable_in_template(template: str, variable: str, value: str) -> str:
|
||||
"""
|
||||
Replace a variable placeholder in template with specified value
|
||||
|
||||
Args:
|
||||
template: Template string with variable placeholder
|
||||
variable: Variable placeholder to replace (e.g. "{tenant_context}")
|
||||
value: Value to insert
|
||||
|
||||
Returns:
|
||||
str: Template with variable placeholder replaced
|
||||
"""
|
||||
return template.replace(variable, value or "")
|
||||
|
||||
|
||||
class ModelVariables:
|
||||
"""Manages model-related variables and configurations"""
|
||||
|
||||
def __init__(self, tenant_id: int, variables: Dict[str, Any] = None):
|
||||
"""
|
||||
Initialize ModelVariables with tenant and optional template manager
|
||||
|
||||
Args:
|
||||
tenant: Tenant instance
|
||||
template_manager: Optional TemplateManager instance
|
||||
"""
|
||||
current_app.logger.info(f'Model variables initialized with tenant {tenant_id} and variables \n{variables}')
|
||||
self.tenant_id = tenant_id
|
||||
self._variables = variables if variables is not None else self._initialize_variables()
|
||||
current_app.logger.info(f'Model _variables initialized to {self._variables}')
|
||||
self._embedding_model = None
|
||||
self._embedding_model_class = None
|
||||
self._llm_instances = {}
|
||||
self.llm_metrics_handler = LLMMetricsHandler()
|
||||
self._transcription_model = None
|
||||
|
||||
def _initialize_variables(self) -> Dict[str, Any]:
|
||||
"""Initialize the variables dictionary"""
|
||||
variables = {}
|
||||
|
||||
tenant = Tenant.query.get(self.tenant_id)
|
||||
if not tenant:
|
||||
raise EveAITenantNotFound(f"Tenant {self.tenant_id} not found")
|
||||
|
||||
# Set model providers
|
||||
variables['embedding_provider'], variables['embedding_model'] = tenant.embedding_model.split('.')
|
||||
variables['llm_provider'], variables['llm_model'] = tenant.llm_model.split('.')
|
||||
variables['llm_full_model'] = tenant.llm_model
|
||||
|
||||
# Set model-specific configurations
|
||||
model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {})
|
||||
variables.update(model_config)
|
||||
|
||||
# Additional configurations
|
||||
variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model]
|
||||
variables['max_compression_duration'] = current_app.config['MAX_COMPRESSION_DURATION']
|
||||
variables['max_transcription_duration'] = current_app.config['MAX_TRANSCRIPTION_DURATION']
|
||||
variables['compression_cpu_limit'] = current_app.config['COMPRESSION_CPU_LIMIT']
|
||||
variables['compression_process_delay'] = current_app.config['COMPRESSION_PROCESS_DELAY']
|
||||
|
||||
return variables
|
||||
|
||||
@property
|
||||
def embedding_model(self):
|
||||
"""Get the embedding model instance"""
|
||||
if self._embedding_model is None:
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
self._embedding_model = TrackedOpenAIEmbeddings(
|
||||
api_key=api_key,
|
||||
model=self._variables['embedding_model']
|
||||
)
|
||||
return self._embedding_model
|
||||
|
||||
@property
|
||||
def embedding_model_class(self):
|
||||
"""Get the embedding model class"""
|
||||
if self._embedding_model_class is None:
|
||||
if self._variables['embedding_model'] == 'text-embedding-3-large':
|
||||
self._embedding_model_class = EmbeddingLargeOpenAI
|
||||
else: # text-embedding-3-small
|
||||
self._embedding_model_class = EmbeddingSmallOpenAI
|
||||
|
||||
return self._embedding_model_class
|
||||
|
||||
@property
|
||||
def annotation_chunk_length(self):
|
||||
return self._variables['annotation_chunk_length']
|
||||
|
||||
@property
|
||||
def max_compression_duration(self):
|
||||
return self._variables['max_compression_duration']
|
||||
|
||||
@property
|
||||
def max_transcription_duration(self):
|
||||
return self._variables['max_transcription_duration']
|
||||
|
||||
@property
|
||||
def compression_cpu_limit(self):
|
||||
return self._variables['compression_cpu_limit']
|
||||
|
||||
@property
|
||||
def compression_process_delay(self):
|
||||
return self._variables['compression_process_delay']
|
||||
|
||||
def get_llm(self, temperature: float = 0.3, **kwargs) -> Any:
|
||||
"""
|
||||
Get an LLM instance with specific configuration
|
||||
|
||||
Args:
|
||||
temperature: The temperature for the LLM
|
||||
**kwargs: Additional configuration parameters
|
||||
|
||||
Returns:
|
||||
An instance of the configured LLM
|
||||
"""
|
||||
cache_key = f"{temperature}_{hash(frozenset(kwargs.items()))}"
|
||||
|
||||
if cache_key not in self._llm_instances:
|
||||
provider = self._variables['llm_provider']
|
||||
model = self._variables['llm_model']
|
||||
|
||||
if provider == 'openai':
|
||||
self._llm_instances[cache_key] = ChatOpenAI(
|
||||
api_key=os.getenv('OPENAI_API_KEY'),
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
callbacks=[self.llm_metrics_handler],
|
||||
**kwargs
|
||||
)
|
||||
elif provider == 'anthropic':
|
||||
self._llm_instances[cache_key] = ChatAnthropic(
|
||||
api_key=os.getenv('ANTHROPIC_API_KEY'),
|
||||
model=current_app.config['ANTHROPIC_LLM_VERSIONS'][model],
|
||||
temperature=temperature,
|
||||
callbacks=[self.llm_metrics_handler],
|
||||
**kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported LLM provider: {provider}")
|
||||
|
||||
return self._llm_instances[cache_key]
|
||||
|
||||
@property
|
||||
def transcription_model(self) -> TrackedOpenAITranscription:
|
||||
"""Get the transcription model instance"""
|
||||
if self._transcription_model is None:
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
self._transcription_model = TrackedOpenAITranscription(
|
||||
api_key=api_key,
|
||||
model='whisper-1'
|
||||
)
|
||||
return self._transcription_model
|
||||
|
||||
# Remove the old transcription-related methods since they're now handled by TrackedOpenAITranscription
|
||||
@property
|
||||
def transcription_client(self):
|
||||
raise DeprecationWarning("Use transcription_model instead")
|
||||
|
||||
def transcribe(self, *args, **kwargs):
|
||||
raise DeprecationWarning("Use transcription_model.transcribe() instead")
|
||||
|
||||
def get_template(self, template_name: str, version: Optional[str] = None) -> str:
|
||||
"""
|
||||
Get a template for the tenant's configured LLM
|
||||
|
||||
Args:
|
||||
template_name: Name of the template to retrieve
|
||||
version: Optional specific version to retrieve
|
||||
|
||||
Returns:
|
||||
The template content
|
||||
"""
|
||||
try:
|
||||
template = template_manager.get_template(
|
||||
self._variables['llm_full_model'],
|
||||
template_name,
|
||||
version
|
||||
)
|
||||
return template.content
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error getting template {template_name}: {str(e)}")
|
||||
# Fall back to old template loading if template_manager fails
|
||||
if template_name in self._variables.get('templates', {}):
|
||||
return self._variables['templates'][template_name]
|
||||
raise
|
||||
|
||||
|
||||
class ModelVariablesCacheHandler(CacheHandler[ModelVariables]):
|
||||
handler_name = 'model_vars_cache' # Used to access handler instance from cache_manager
|
||||
|
||||
def __init__(self, region):
|
||||
super().__init__(region, 'model_variables')
|
||||
self.configure_keys('tenant_id')
|
||||
self.subscribe_to_model('Tenant', ['tenant_id'])
|
||||
|
||||
def to_cache_data(self, instance: ModelVariables) -> Dict[str, Any]:
|
||||
return {
|
||||
'tenant_id': instance.tenant_id,
|
||||
'variables': instance._variables,
|
||||
'last_updated': dt.now(tz=tz.utc).isoformat()
|
||||
}
|
||||
|
||||
def from_cache_data(self, data: Dict[str, Any], tenant_id: int, **kwargs) -> ModelVariables:
|
||||
instance = ModelVariables(tenant_id, data.get('variables'))
|
||||
return instance
|
||||
|
||||
def should_cache(self, value: Dict[str, Any]) -> bool:
|
||||
required_fields = {'tenant_id', 'variables'}
|
||||
return all(field in value for field in required_fields)
|
||||
|
||||
|
||||
# Register the handler with the cache manager
|
||||
cache_manager.register_handler(ModelVariablesCacheHandler, 'model')
|
||||
|
||||
|
||||
# Helper function to get cached model variables
|
||||
def get_model_variables(tenant_id: int) -> ModelVariables:
|
||||
return cache_manager.model_vars_cache.get(
|
||||
lambda tenant_id: ModelVariables(tenant_id), # function to create ModelVariables if required
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Written in a long format, without lambda
|
||||
# def get_model_variables(tenant_id: int) -> ModelVariables:
|
||||
# """
|
||||
# Get ModelVariables instance, either from cache or newly created
|
||||
#
|
||||
# Args:
|
||||
# tenant_id: The tenant's ID
|
||||
#
|
||||
# Returns:
|
||||
# ModelVariables: Instance with either cached or fresh data
|
||||
#
|
||||
# Raises:
|
||||
# TenantNotFoundError: If tenant doesn't exist
|
||||
# CacheStateError: If cached data is invalid
|
||||
# """
|
||||
#
|
||||
# def create_new_instance(tenant_id: int) -> ModelVariables:
|
||||
# """Creator function that's called when cache miss occurs"""
|
||||
# return ModelVariables(tenant_id) # This will initialize fresh variables
|
||||
#
|
||||
# return cache_manager.model_vars_cache.get(
|
||||
# create_new_instance, # Function to create new instance if needed
|
||||
# tenant_id=tenant_id # Parameters passed to both get() and create_new_instance
|
||||
# )
|
||||
|
||||
Reference in New Issue
Block a user