- Improvements on audio processing to limit CPU and memory usage

- Removed Portkey from the equation, and defined explicit monitoring using Langchain native code - Optimization of Business Event logging
2024-10-02 14:11:46 +02:00
parent 883175b8f5
commit b700cfac64
13 changed files with 450 additions and 228 deletions
--- a/common/langchain/llm_metrics_handler.py
+++ b/common/langchain/llm_metrics_handler.py
@@ -0,0 +1,49 @@
+import time
+from langchain.callbacks.base import BaseCallbackHandler
+from typing import Dict, Any, List
+from langchain.schema import LLMResult
+from common.utils.business_event_context import current_event
+from flask import current_app
+
+
+class LLMMetricsHandler(BaseCallbackHandler):
+    def __init__(self):
+        self.total_tokens: int = 0
+        self.prompt_tokens: int = 0
+        self.completion_tokens: int = 0
+        self.start_time: float = 0
+        self.end_time: float = 0
+        self.total_time: float = 0
+
+    def reset(self):
+        self.total_tokens = 0
+        self.prompt_tokens = 0
+        self.completion_tokens = 0
+        self.start_time = 0
+        self.end_time = 0
+        self.total_time = 0
+
+    def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
+        self.start_time = time.time()
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        self.end_time = time.time()
+        self.total_time = self.end_time - self.start_time
+
+        usage = response.llm_output.get('token_usage', {})
+        self.prompt_tokens += usage.get('prompt_tokens', 0)
+        self.completion_tokens += usage.get('completion_tokens', 0)
+        self.total_tokens = self.prompt_tokens + self.completion_tokens
+
+        metrics = self.get_metrics()
+        current_event.log_llm_metrics(metrics)
+        self.reset()  # Reset for the next call
+
+    def get_metrics(self) -> Dict[str, int | float]:
+        return {
+            'total_tokens': self.total_tokens,
+            'prompt_tokens': self.prompt_tokens,
+            'completion_tokens': self.completion_tokens,
+            'time_elapsed': self.total_time,
+            'interaction_type': 'LLM',
+        }
--- a/common/langchain/tracked_openai_embeddings.py
+++ b/common/langchain/tracked_openai_embeddings.py
@@ -0,0 +1,51 @@
+from langchain_openai import OpenAIEmbeddings
+from typing import List, Any
+import time
+from common.utils.business_event_context import current_event
+
+
+class TrackedOpenAIEmbeddings(OpenAIEmbeddings):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        start_time = time.time()
+        result = super().embed_documents(texts)
+        end_time = time.time()
+
+        # Estimate token usage (OpenAI uses tiktoken for this)
+        import tiktoken
+        enc = tiktoken.encoding_for_model(self.model)
+        total_tokens = sum(len(enc.encode(text)) for text in texts)
+
+        metrics = {
+            'total_tokens': total_tokens,
+            'prompt_tokens': total_tokens,  # For embeddings, all tokens are prompt tokens
+            'completion_tokens': 0,
+            'time_elapsed': end_time - start_time,
+            'interaction_type': 'Embedding',
+            }
+        current_event.log_llm_metrics(metrics)
+
+        return result
+
+    def embed_query(self, text: str) -> List[float]:
+        start_time = time.time()
+        result = super().embed_query(text)
+        end_time = time.time()
+
+        # Estimate token usage
+        import tiktoken
+        enc = tiktoken.encoding_for_model(self.model)
+        total_tokens = len(enc.encode(text))
+
+        metrics = {
+            'total_tokens': total_tokens,
+            'prompt_tokens': total_tokens,
+            'completion_tokens': 0,
+            'time_elapsed': end_time - start_time,
+            'interaction_type': 'Embedding',
+        }
+        current_event.log_llm_metrics(metrics)
+
+        return result
--- a/common/langchain/tracked_transcribe.py
+++ b/common/langchain/tracked_transcribe.py
@@ -0,0 +1,27 @@
+import time
+from common.utils.business_event_context import current_event
+
+
+def tracked_transcribe(client, *args, **kwargs):
+    start_time = time.time()
+
+    # Extract the file and model from kwargs if present, otherwise use defaults
+    file = kwargs.get('file')
+    model = kwargs.get('model', 'whisper-1')
+    duration = kwargs.pop('duration', 600)
+
+    result = client.audio.transcriptions.create(*args, **kwargs)
+    end_time = time.time()
+
+    # Token usage for transcriptions is actually the duration in seconds we pass, as the whisper model is priced per second transcribed
+
+    metrics = {
+        'total_tokens': duration,
+        'prompt_tokens': 0,  # For transcriptions, all tokens are considered "completion"
+        'completion_tokens': duration,
+        'time_elapsed': end_time - start_time,
+        'interaction_type': 'ASR',
+    }
+    current_event.log_llm_metrics(metrics)
+
+    return result