- Improvements on audio processing to limit CPU and memory usage
- Removed Portkey from the equation, and defined explicit monitoring using Langchain native code - Optimization of Business Event logging
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
|
||||
import psutil
|
||||
from pydub import AudioSegment
|
||||
import tempfile
|
||||
from common.extensions import minio_client
|
||||
@@ -16,6 +18,11 @@ class AudioProcessor(TranscriptionProcessor):
|
||||
self.transcription_client = model_variables['transcription_client']
|
||||
self.transcription_model = model_variables['transcription_model']
|
||||
self.ffmpeg_path = 'ffmpeg'
|
||||
self.max_compression_duration = model_variables['max_compression_duration']
|
||||
self.max_transcription_duration = model_variables['max_transcription_duration']
|
||||
self.compression_cpu_limit = model_variables.get('compression_cpu_limit', 50) # CPU usage limit in percentage
|
||||
self.compression_process_delay = model_variables.get('compression_process_delay', 0.1) # Delay between processing chunks in seconds
|
||||
self.file_type = document_version.file_type
|
||||
|
||||
def _get_transcription(self):
|
||||
file_data = minio_client.download_document_file(
|
||||
@@ -26,68 +33,121 @@ class AudioProcessor(TranscriptionProcessor):
|
||||
self.document_version.file_name
|
||||
)
|
||||
|
||||
with current_event.create_span("Audio Processing"):
|
||||
with current_event.create_span("Audio Compression"):
|
||||
compressed_audio = self._compress_audio(file_data)
|
||||
with current_event.create_span("Transcription Generation"):
|
||||
with current_event.create_span("Audio Transcription"):
|
||||
transcription = self._transcribe_audio(compressed_audio)
|
||||
|
||||
return transcription
|
||||
|
||||
def _compress_audio(self, audio_data):
|
||||
self._log("Compressing audio")
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{self.document_version.file_type}') as temp_input:
|
||||
temp_input.write(audio_data)
|
||||
temp_input.flush()
|
||||
|
||||
# Use a unique filename for the output to avoid conflicts
|
||||
output_filename = f'compressed_{os.urandom(8).hex()}.mp3'
|
||||
output_path = os.path.join(tempfile.gettempdir(), output_filename)
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{self.document_version.file_type}') as temp_file:
|
||||
temp_file.write(audio_data)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self.ffmpeg_path, '-y', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', output_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
try:
|
||||
self._log("Creating AudioSegment from file")
|
||||
audio_info = AudioSegment.from_file(temp_file_path, format=self.document_version.file_type)
|
||||
self._log("Finished creating AudioSegment from file")
|
||||
total_duration = len(audio_info)
|
||||
self._log(f"Audio duration: {total_duration / 1000} seconds")
|
||||
|
||||
segment_length = self.max_compression_duration * 1000 # Convert to milliseconds
|
||||
total_chunks = (total_duration + segment_length - 1) // segment_length
|
||||
|
||||
compressed_segments = AudioSegment.empty()
|
||||
|
||||
for i in range(total_chunks):
|
||||
self._log(f"Compressing segment {i + 1} of {total_chunks}")
|
||||
|
||||
start_time = i * segment_length
|
||||
end_time = min((i + 1) * segment_length, total_duration)
|
||||
|
||||
chunk = AudioSegment.from_file(
|
||||
temp_file_path,
|
||||
format=self.document_version.file_type,
|
||||
start_second=start_time / 1000,
|
||||
duration=(end_time - start_time) / 1000
|
||||
)
|
||||
|
||||
with open(output_path, 'rb') as f:
|
||||
compressed_data = f.read()
|
||||
compressed_chunk = self._compress_segment(chunk)
|
||||
compressed_segments += compressed_chunk
|
||||
|
||||
# Save compressed audio to MinIO
|
||||
compressed_filename = f"{self.document_version.id}_compressed.mp3"
|
||||
time.sleep(self.compression_process_delay)
|
||||
|
||||
# Save compressed audio to MinIO
|
||||
compressed_filename = f"{self.document_version.id}_compressed.mp3"
|
||||
with io.BytesIO() as compressed_buffer:
|
||||
compressed_segments.export(compressed_buffer, format="mp3")
|
||||
compressed_buffer.seek(0)
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
compressed_filename,
|
||||
compressed_data
|
||||
compressed_buffer.read()
|
||||
)
|
||||
self._log(f"Saved compressed audio to MinIO: {compressed_filename}")
|
||||
self._log(f"Saved compressed audio to MinIO: {compressed_filename}")
|
||||
|
||||
return compressed_data
|
||||
return compressed_segments
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_message = f"Compression failed: {e.stderr}"
|
||||
self._log(error_message, level='error')
|
||||
raise Exception(error_message)
|
||||
except Exception as e:
|
||||
self._log(f"Error during audio processing: {str(e)}", level='error')
|
||||
raise
|
||||
finally:
|
||||
os.unlink(temp_file_path) # Ensure the temporary file is deleted
|
||||
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
os.unlink(temp_input.name)
|
||||
if os.path.exists(output_path):
|
||||
os.unlink(output_path)
|
||||
def _compress_segment(self, audio_segment):
|
||||
with io.BytesIO() as segment_buffer:
|
||||
audio_segment.export(segment_buffer, format="wav")
|
||||
segment_buffer.seek(0)
|
||||
|
||||
with io.BytesIO() as output_buffer:
|
||||
command = [
|
||||
'nice', '-n', '19',
|
||||
'ffmpeg',
|
||||
'-i', 'pipe:0',
|
||||
'-ar', '16000',
|
||||
'-ac', '1',
|
||||
'-b:a', '32k',
|
||||
'-filter:a', 'loudnorm',
|
||||
'-f', 'mp3',
|
||||
'pipe:1'
|
||||
]
|
||||
|
||||
process = psutil.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
stdout, stderr = process.communicate(input=segment_buffer.read())
|
||||
|
||||
if process.returncode != 0:
|
||||
self._log(f"FFmpeg error: {stderr.decode()}", level='error')
|
||||
raise Exception("FFmpeg compression failed")
|
||||
|
||||
output_buffer.write(stdout)
|
||||
output_buffer.seek(0)
|
||||
compressed_segment = AudioSegment.from_mp3(output_buffer)
|
||||
|
||||
return compressed_segment
|
||||
|
||||
def _transcribe_audio(self, audio_data):
|
||||
self._log("Starting audio transcription")
|
||||
audio = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
||||
# audio = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
||||
audio = audio_data
|
||||
|
||||
segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||
segment_length = self.max_transcription_duration * 1000 # calculate milliseconds
|
||||
transcriptions = []
|
||||
total_chunks = len(audio) // segment_length + 1
|
||||
|
||||
for i, chunk in enumerate(audio[::segment_length]):
|
||||
self._log(f'Processing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
||||
self._log(f'Processing chunk {i + 1} of {total_chunks}')
|
||||
segment_duration = 0
|
||||
if i == total_chunks - 1:
|
||||
segment_duration = (len(audio) % segment_length) // 1000
|
||||
else:
|
||||
segment_duration = self.max_transcription_duration
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
||||
chunk.export(temp_audio.name, format="mp3")
|
||||
@@ -103,11 +163,12 @@ class AudioProcessor(TranscriptionProcessor):
|
||||
audio_file.seek(0) # Reset file pointer to the beginning
|
||||
|
||||
self._log("Calling transcription API")
|
||||
transcription = self.transcription_client.audio.transcriptions.create(
|
||||
transcription = self.model_variables.transcribe(
|
||||
file=audio_file,
|
||||
model=self.transcription_model,
|
||||
language=self.document_version.language,
|
||||
response_format='verbose_json',
|
||||
duration=segment_duration,
|
||||
)
|
||||
self._log("Transcription API call completed")
|
||||
|
||||
|
||||
@@ -171,10 +171,12 @@ def embed_markdown(tenant, model_variables, document_version, markdown, title):
|
||||
model_variables['max_chunk_size'])
|
||||
|
||||
# Enrich chunks
|
||||
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
|
||||
with current_event.create_span("Enrich Chunks"):
|
||||
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
|
||||
|
||||
# Create embeddings
|
||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
with current_event.create_span("Create Embeddings"):
|
||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
|
||||
# Update document version and save embeddings
|
||||
try:
|
||||
@@ -194,7 +196,6 @@ def embed_markdown(tenant, model_variables, document_version, markdown, title):
|
||||
|
||||
|
||||
def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||
current_event.log("Starting Enriching Chunks Processing")
|
||||
current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
|
||||
@@ -227,7 +228,6 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||
|
||||
current_app.logger.debug(f'Finished enriching chunks for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
current_event.log("Finished Enriching Chunks Processing")
|
||||
|
||||
return enriched_chunks
|
||||
|
||||
@@ -261,7 +261,6 @@ def summarize_chunk(tenant, model_variables, document_version, chunk):
|
||||
|
||||
|
||||
def embed_chunks(tenant, model_variables, document_version, chunks):
|
||||
current_event.log("Starting Embedding Chunks Processing")
|
||||
current_app.logger.debug(f'Embedding chunks for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
embedding_model = model_variables['embedding_model']
|
||||
|
||||
Reference in New Issue
Block a user