- Improvements on audio processing to limit CPU and memory usage
- Removed Portkey from the equation, and defined explicit monitoring using Langchain native code - Optimization of Business Event logging
This commit is contained in:
@@ -11,6 +11,9 @@ from openai import OpenAI
|
||||
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
|
||||
from portkey_ai.langchain.portkey_langchain_callback_handler import LangchainCallbackHandler
|
||||
|
||||
from common.langchain.llm_metrics_handler import LLMMetricsHandler
|
||||
from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
|
||||
from common.langchain.tracked_transcribe import tracked_transcribe
|
||||
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI
|
||||
from common.models.user import Tenant
|
||||
from config.model_config import MODEL_CONFIG
|
||||
@@ -48,6 +51,8 @@ class ModelVariables(MutableMapping):
|
||||
self._transcription_client = None
|
||||
self._prompt_templates = {}
|
||||
self._embedding_db_model = None
|
||||
self.llm_metrics_handler = LLMMetricsHandler()
|
||||
self._transcription_client = None
|
||||
|
||||
def _initialize_variables(self):
|
||||
variables = {}
|
||||
@@ -89,26 +94,20 @@ class ModelVariables(MutableMapping):
|
||||
if variables['tool_calling_supported']:
|
||||
variables['cited_answer_cls'] = CitedAnswer
|
||||
|
||||
variables['max_compression_duration'] = current_app.config['MAX_COMPRESSION_DURATION']
|
||||
variables['max_transcription_duration'] = current_app.config['MAX_TRANSCRIPTION_DURATION']
|
||||
variables['compression_cpu_limit'] = current_app.config['COMPRESSION_CPU_LIMIT']
|
||||
variables['compression_process_delay'] = current_app.config['COMPRESSION_PROCESS_DELAY']
|
||||
|
||||
return variables
|
||||
|
||||
@property
|
||||
def embedding_model(self):
|
||||
portkey_metadata = self.get_portkey_metadata()
|
||||
|
||||
portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
|
||||
provider=self._variables['embedding_provider'],
|
||||
metadata=portkey_metadata,
|
||||
trace_id=current_event.trace_id,
|
||||
span_id=current_event.span_id,
|
||||
span_name=current_event.span_name,
|
||||
parent_span_id=current_event.parent_span_id
|
||||
)
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
model = self._variables['embedding_model']
|
||||
self._embedding_model = OpenAIEmbeddings(api_key=api_key,
|
||||
model=model,
|
||||
base_url=PORTKEY_GATEWAY_URL,
|
||||
default_headers=portkey_headers)
|
||||
self._embedding_model = TrackedOpenAIEmbeddings(api_key=api_key,
|
||||
model=model,
|
||||
)
|
||||
self._embedding_db_model = EmbeddingSmallOpenAI \
|
||||
if model == 'text-embedding-3-small' \
|
||||
else EmbeddingLargeOpenAI
|
||||
@@ -117,108 +116,40 @@ class ModelVariables(MutableMapping):
|
||||
|
||||
@property
|
||||
def llm(self):
|
||||
portkey_headers = self.get_portkey_headers_for_llm()
|
||||
api_key = self.get_api_key_for_llm()
|
||||
self._llm = ChatOpenAI(api_key=api_key,
|
||||
model=self._variables['llm_model'],
|
||||
temperature=self._variables['RAG_temperature'],
|
||||
base_url=PORTKEY_GATEWAY_URL,
|
||||
default_headers=portkey_headers)
|
||||
callbacks=[self.llm_metrics_handler])
|
||||
return self._llm
|
||||
|
||||
@property
|
||||
def llm_no_rag(self):
|
||||
portkey_headers = self.get_portkey_headers_for_llm()
|
||||
api_key = self.get_api_key_for_llm()
|
||||
self._llm_no_rag = ChatOpenAI(api_key=api_key,
|
||||
model=self._variables['llm_model'],
|
||||
temperature=self._variables['RAG_temperature'],
|
||||
base_url=PORTKEY_GATEWAY_URL,
|
||||
default_headers=portkey_headers)
|
||||
callbacks=[self.llm_metrics_handler])
|
||||
return self._llm_no_rag
|
||||
|
||||
def get_portkey_headers_for_llm(self):
|
||||
portkey_metadata = self.get_portkey_metadata()
|
||||
portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
|
||||
metadata=portkey_metadata,
|
||||
provider=self._variables['llm_provider'],
|
||||
trace_id=current_event.trace_id,
|
||||
span_id=current_event.span_id,
|
||||
span_name=current_event.span_name,
|
||||
parent_span_id=current_event.parent_span_id
|
||||
)
|
||||
return portkey_headers
|
||||
|
||||
def get_portkey_metadata(self):
|
||||
environment = os.getenv('FLASK_ENV', 'development')
|
||||
portkey_metadata = {'tenant_id': str(self.tenant.id),
|
||||
'environment': environment,
|
||||
'trace_id': current_event.trace_id,
|
||||
'span_id': current_event.span_id,
|
||||
'span_name': current_event.span_name,
|
||||
'parent_span_id': current_event.parent_span_id,
|
||||
}
|
||||
return portkey_metadata
|
||||
|
||||
def get_api_key_for_llm(self):
|
||||
if self._variables['llm_provider'] == 'openai':
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
else: # self._variables['llm_provider'] == 'anthropic'
|
||||
else: # self._variables['llm_provider'] == 'anthropic'
|
||||
api_key = os.getenv('ANTHROPIC_API_KEY')
|
||||
|
||||
return api_key
|
||||
|
||||
# def _initialize_llm(self):
|
||||
#
|
||||
#
|
||||
# if self._variables['llm_provider'] == 'openai':
|
||||
# portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
|
||||
# metadata=portkey_metadata,
|
||||
# provider='openai')
|
||||
#
|
||||
# self._llm = ChatOpenAI(api_key=api_key,
|
||||
# model=self._variables['llm_model'],
|
||||
# temperature=self._variables['RAG_temperature'],
|
||||
# base_url=PORTKEY_GATEWAY_URL,
|
||||
# default_headers=portkey_headers)
|
||||
# self._llm_no_rag = ChatOpenAI(api_key=api_key,
|
||||
# model=self._variables['llm_model'],
|
||||
# temperature=self._variables['no_RAG_temperature'],
|
||||
# base_url=PORTKEY_GATEWAY_URL,
|
||||
# default_headers=portkey_headers)
|
||||
# self._variables['tool_calling_supported'] = self._variables['llm_model'] in ['gpt-4o', 'gpt-4o-mini']
|
||||
# elif self._variables['llm_provider'] == 'anthropic':
|
||||
# api_key = os.getenv('ANTHROPIC_API_KEY')
|
||||
# llm_model_ext = os.getenv('ANTHROPIC_LLM_VERSIONS', {}).get(self._variables['llm_model'])
|
||||
# self._llm = ChatAnthropic(api_key=api_key,
|
||||
# model=llm_model_ext,
|
||||
# temperature=self._variables['RAG_temperature'])
|
||||
# self._llm_no_rag = ChatAnthropic(api_key=api_key,
|
||||
# model=llm_model_ext,
|
||||
# temperature=self._variables['RAG_temperature'])
|
||||
# self._variables['tool_calling_supported'] = True
|
||||
# else:
|
||||
# raise ValueError(f"Invalid chat provider: {self._variables['llm_provider']}")
|
||||
|
||||
@property
|
||||
def transcription_client(self):
|
||||
environment = os.getenv('FLASK_ENV', 'development')
|
||||
portkey_metadata = self.get_portkey_metadata()
|
||||
portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
|
||||
metadata=portkey_metadata,
|
||||
provider='openai',
|
||||
trace_id=current_event.trace_id,
|
||||
span_id=current_event.span_id,
|
||||
span_name=current_event.span_name,
|
||||
parent_span_id=current_event.parent_span_id
|
||||
)
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
self._transcription_client = OpenAI(api_key=api_key,
|
||||
base_url=PORTKEY_GATEWAY_URL,
|
||||
default_headers=portkey_headers)
|
||||
self._transcription_client = OpenAI(api_key=api_key, )
|
||||
self._variables['transcription_model'] = 'whisper-1'
|
||||
return self._transcription_client
|
||||
|
||||
def transcribe(self, *args, **kwargs):
|
||||
return tracked_transcribe(self._transcription_client, *args, **kwargs)
|
||||
|
||||
@property
|
||||
def embedding_db_model(self):
|
||||
if self._embedding_db_model is None:
|
||||
|
||||
Reference in New Issue
Block a user