- Allow filtering on Tenant Types & searching for parts of Tenant names - Implement health checks - Start Prometheus monitoring (needs to be finalized) - Refine audio_processor and srt_processor to reduce duplicate code and support for larger files - Introduce repopack to reason in LLMs about the code
148 lines
6.2 KiB
Python
148 lines
6.2 KiB
Python
import io
|
|
import os
|
|
|
|
from pydub import AudioSegment
|
|
import tempfile
|
|
from common.extensions import minio_client
|
|
import subprocess
|
|
|
|
from .transcription_processor import TranscriptionProcessor
|
|
|
|
|
|
class AudioProcessor(TranscriptionProcessor):
|
|
def __init__(self, tenant, model_variables, document_version):
|
|
super().__init__(tenant, model_variables, document_version)
|
|
self.transcription_client = model_variables['transcription_client']
|
|
self.transcription_model = model_variables['transcription_model']
|
|
self.ffmpeg_path = 'ffmpeg'
|
|
|
|
def _get_transcription(self):
|
|
file_data = minio_client.download_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
self.document_version.file_name
|
|
)
|
|
compressed_audio = self._compress_audio(file_data)
|
|
return self._transcribe_audio(compressed_audio)
|
|
|
|
def _compress_audio(self, audio_data):
|
|
self._log("Compressing audio")
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{self.document_version.file_type}') as temp_input:
|
|
temp_input.write(audio_data)
|
|
temp_input.flush()
|
|
|
|
# Use a unique filename for the output to avoid conflicts
|
|
output_filename = f'compressed_{os.urandom(8).hex()}.mp3'
|
|
output_path = os.path.join(tempfile.gettempdir(), output_filename)
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
[self.ffmpeg_path, '-y', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', output_path],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True
|
|
)
|
|
|
|
with open(output_path, 'rb') as f:
|
|
compressed_data = f.read()
|
|
|
|
# Save compressed audio to MinIO
|
|
compressed_filename = f"{self.document_version.id}_compressed.mp3"
|
|
minio_client.upload_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
compressed_filename,
|
|
compressed_data
|
|
)
|
|
self._log(f"Saved compressed audio to MinIO: {compressed_filename}")
|
|
|
|
return compressed_data
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
error_message = f"Compression failed: {e.stderr}"
|
|
self._log(error_message, level='error')
|
|
raise Exception(error_message)
|
|
|
|
finally:
|
|
# Clean up temporary files
|
|
os.unlink(temp_input.name)
|
|
if os.path.exists(output_path):
|
|
os.unlink(output_path)
|
|
|
|
def _transcribe_audio(self, audio_data):
|
|
self._log("Starting audio transcription")
|
|
audio = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
|
|
|
segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
|
transcriptions = []
|
|
|
|
for i, chunk in enumerate(audio[::segment_length]):
|
|
self._log(f'Processing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
|
chunk.export(temp_audio.name, format="mp3")
|
|
temp_audio.flush()
|
|
|
|
try:
|
|
file_size = os.path.getsize(temp_audio.name)
|
|
self._log(f"Temporary audio file size: {file_size} bytes")
|
|
|
|
with open(temp_audio.name, 'rb') as audio_file:
|
|
file_start = audio_file.read(100)
|
|
self._log(f"First 100 bytes of audio file: {file_start}")
|
|
audio_file.seek(0) # Reset file pointer to the beginning
|
|
|
|
self._log("Calling transcription API")
|
|
transcription = self.transcription_client.audio.transcriptions.create(
|
|
file=audio_file,
|
|
model=self.transcription_model,
|
|
language=self.document_version.language,
|
|
response_format='verbose_json',
|
|
)
|
|
self._log("Transcription API call completed")
|
|
|
|
if transcription:
|
|
# Handle the transcription result based on its type
|
|
if isinstance(transcription, str):
|
|
self._log(f"Transcription result (string): {transcription[:100]}...")
|
|
transcriptions.append(transcription)
|
|
elif hasattr(transcription, 'text'):
|
|
self._log(
|
|
f"Transcription result (object with 'text' attribute): {transcription.text[:100]}...")
|
|
transcriptions.append(transcription.text)
|
|
else:
|
|
self._log(f"Transcription result (unknown type): {str(transcription)[:100]}...")
|
|
transcriptions.append(str(transcription))
|
|
else:
|
|
self._log("Warning: Received empty transcription", level='warning')
|
|
|
|
except Exception as e:
|
|
self._log(f"Error during transcription: {str(e)}", level='error')
|
|
finally:
|
|
os.unlink(temp_audio.name)
|
|
|
|
full_transcription = " ".join(filter(None, transcriptions))
|
|
|
|
if not full_transcription:
|
|
self._log("Warning: No transcription was generated", level='warning')
|
|
full_transcription = "No transcription available."
|
|
|
|
# Save transcription to MinIO
|
|
transcription_filename = f"{self.document_version.id}_transcription.txt"
|
|
minio_client.upload_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
transcription_filename,
|
|
full_transcription.encode('utf-8')
|
|
)
|
|
self._log(f"Saved transcription to MinIO: {transcription_filename}")
|
|
|
|
return full_transcription
|
|
|