eveAI/common/langchain/tracked_transcription.py

# common/langchain/tracked_transcription.py
from typing import Any, Optional, Dict
import time
from openai import OpenAI
from common.utils.business_event_context import current_event


class TrackedOpenAITranscription:
    """Wrapper for OpenAI transcription with metric tracking"""

    def __init__(self, api_key: str, **kwargs: Any):
        """Initialize with OpenAI client settings"""
        self.client = OpenAI(api_key=api_key)
        self.model = kwargs.get('model', 'whisper-1')

    def transcribe(self,
                   file: Any,
                   model: Optional[str] = None,
                   language: Optional[str] = None,
                   prompt: Optional[str] = None,
                   response_format: Optional[str] = None,
                   temperature: Optional[float] = None,
                   duration: Optional[int] = None) -> str:
        """
        Transcribe audio with metrics tracking

        Args:
            file: Audio file to transcribe
            model: Model to use (defaults to whisper-1)
            language: Optional language of the audio
            prompt: Optional prompt to guide transcription
            response_format: Response format (json, text, etc)
            temperature: Sampling temperature
            duration: Duration of audio in seconds for metrics

        Returns:
            Transcription text
        """
        start_time = time.time()

        try:
            # Create transcription options
            options = {
                "file": file,
                "model": model or self.model,
            }
            if language:
                options["language"] = language
            if prompt:
                options["prompt"] = prompt
            if response_format:
                options["response_format"] = response_format
            if temperature:
                options["temperature"] = temperature

            response = self.client.audio.transcriptions.create(**options)

            # Calculate metrics
            end_time = time.time()

            # Token usage for transcriptions is based on audio duration
            metrics = {
                'total_tokens': duration or 600,  # Default to 10 minutes if duration not provided
                'prompt_tokens': 0,  # For transcriptions, all tokens are completion
                'completion_tokens': duration or 600,
                'time_elapsed': end_time - start_time,
                'interaction_type': 'ASR',
            }
            current_event.log_llm_metrics(metrics)

            # Return text from response
            if isinstance(response, str):
                return response
            return response.text

        except Exception as e:
            raise Exception(f"Transcription failed: {str(e)}")