- Introduction of dynamic Processors - Introduction of caching system - Introduction of a better template manager - Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists - Start adaptation of chat client
77 lines
2.7 KiB
Python
77 lines
2.7 KiB
Python
# common/langchain/tracked_transcription.py
|
|
from typing import Any, Optional, Dict
|
|
import time
|
|
from openai import OpenAI
|
|
from common.utils.business_event_context import current_event
|
|
|
|
|
|
class TrackedOpenAITranscription:
|
|
"""Wrapper for OpenAI transcription with metric tracking"""
|
|
|
|
def __init__(self, api_key: str, **kwargs: Any):
|
|
"""Initialize with OpenAI client settings"""
|
|
self.client = OpenAI(api_key=api_key)
|
|
self.model = kwargs.get('model', 'whisper-1')
|
|
|
|
def transcribe(self,
|
|
file: Any,
|
|
model: Optional[str] = None,
|
|
language: Optional[str] = None,
|
|
prompt: Optional[str] = None,
|
|
response_format: Optional[str] = None,
|
|
temperature: Optional[float] = None,
|
|
duration: Optional[int] = None) -> str:
|
|
"""
|
|
Transcribe audio with metrics tracking
|
|
|
|
Args:
|
|
file: Audio file to transcribe
|
|
model: Model to use (defaults to whisper-1)
|
|
language: Optional language of the audio
|
|
prompt: Optional prompt to guide transcription
|
|
response_format: Response format (json, text, etc)
|
|
temperature: Sampling temperature
|
|
duration: Duration of audio in seconds for metrics
|
|
|
|
Returns:
|
|
Transcription text
|
|
"""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Create transcription options
|
|
options = {
|
|
"file": file,
|
|
"model": model or self.model,
|
|
}
|
|
if language:
|
|
options["language"] = language
|
|
if prompt:
|
|
options["prompt"] = prompt
|
|
if response_format:
|
|
options["response_format"] = response_format
|
|
if temperature:
|
|
options["temperature"] = temperature
|
|
|
|
response = self.client.audio.transcriptions.create(**options)
|
|
|
|
# Calculate metrics
|
|
end_time = time.time()
|
|
|
|
# Token usage for transcriptions is based on audio duration
|
|
metrics = {
|
|
'total_tokens': duration or 600, # Default to 10 minutes if duration not provided
|
|
'prompt_tokens': 0, # For transcriptions, all tokens are completion
|
|
'completion_tokens': duration or 600,
|
|
'time_elapsed': end_time - start_time,
|
|
'interaction_type': 'ASR',
|
|
}
|
|
current_event.log_llm_metrics(metrics)
|
|
|
|
# Return text from response
|
|
if isinstance(response, str):
|
|
return response
|
|
return response.text
|
|
|
|
except Exception as e:
|
|
raise Exception(f"Transcription failed: {str(e)}") |