Business event tracing completed for both eveai_workers tasks and eveai_chat_workers tasks

2024-09-27 10:53:42 +02:00
parent ee1b0f1cfa
commit d9cb00fcdc
9 changed files with 306 additions and 253 deletions
--- a/eveai_workers/Processors/audio_processor.py
+++ b/eveai_workers/Processors/audio_processor.py
@@ -7,6 +7,7 @@ from common.extensions import minio_client
 import subprocess

 from .transcription_processor import TranscriptionProcessor
+from common.utils.business_event_context import current_event


 class AudioProcessor(TranscriptionProcessor):
@@ -24,8 +25,13 @@ class AudioProcessor(TranscriptionProcessor):
            self.document_version.id,
            self.document_version.file_name
        )
-        compressed_audio = self._compress_audio(file_data)
-        return self._transcribe_audio(compressed_audio)
+
+        with current_event.create_span("Audio Processing"):
+            compressed_audio = self._compress_audio(file_data)
+        with current_event.create_span("Transcription Generation"):
+            transcription = self._transcribe_audio(compressed_audio)
+
+        return transcription

    def _compress_audio(self, audio_data):
        self._log("Compressing audio")
--- a/eveai_workers/Processors/html_processor.py
+++ b/eveai_workers/Processors/html_processor.py
@@ -31,8 +31,10 @@ class HTMLProcessor(Processor):
            )
            html_content = file_data.decode('utf-8')

-            extracted_html, title = self._parse_html(html_content)
-            markdown = self._generate_markdown_from_html(extracted_html)
+            with current_event.create_span("HTML Content Extraction"):
+                extracted_html, title = self._parse_html(html_content)
+            with current_event.create_span("Markdown Generation"):
+                markdown = self._generate_markdown_from_html(extracted_html)

            self._save_markdown(markdown)
            self._log("Finished processing HTML")
--- a/eveai_workers/Processors/pdf_processor.py
+++ b/eveai_workers/Processors/pdf_processor.py
@@ -10,6 +10,7 @@ from langchain_core.runnables import RunnablePassthrough
 from common.extensions import minio_client
 from common.utils.model_utils import create_language_template
 from .processor import Processor
+from common.utils.business_event_context import current_event


 class PDFProcessor(Processor):
@@ -32,13 +33,14 @@ class PDFProcessor(Processor):
                self.document_version.file_name
            )

-            extracted_content = self._extract_content(file_data)
-            structured_content, title = self._structure_content(extracted_content)
+            with current_event.create_span("PDF Extraction"):
+                extracted_content = self._extract_content(file_data)
+                structured_content, title = self._structure_content(extracted_content)

-            llm_chunks = self._split_content_for_llm(structured_content)
-            markdown = self._process_chunks_with_llm(llm_chunks)
-
-            self._save_markdown(markdown)
+            with current_event.create_span("Markdown Generation"):
+                llm_chunks = self._split_content_for_llm(structured_content)
+                markdown = self._process_chunks_with_llm(llm_chunks)
+                self._save_markdown(markdown)
            self._log("Finished processing PDF")
            return markdown, title
        except Exception as e:
--- a/eveai_workers/Processors/transcription_processor.py
+++ b/eveai_workers/Processors/transcription_processor.py
@@ -1,11 +1,13 @@
 # transcription_processor.py
-from common.utils.model_utils import create_language_template
-from .processor import Processor
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough

+from common.utils.model_utils import create_language_template
+from .processor import Processor
+from common.utils.business_event_context import current_event
+

 class TranscriptionProcessor(Processor):
    def __init__(self, tenant, model_variables, document_version):
@@ -16,12 +18,14 @@ class TranscriptionProcessor(Processor):
    def process(self):
        self._log("Starting Transcription processing")
        try:
-            transcription = self._get_transcription()
-            chunks = self._chunk_transcription(transcription)
-            markdown_chunks = self._process_chunks(chunks)
-            full_markdown = self._combine_markdown_chunks(markdown_chunks)
-            self._save_markdown(full_markdown)
-            self._log("Finished processing Transcription")
+            with current_event.create_span("Transcription Generation"):
+                transcription = self._get_transcription()
+            with current_event.create_span("Markdown Generation"):
+                chunks = self._chunk_transcription(transcription)
+                markdown_chunks = self._process_chunks(chunks)
+                full_markdown = self._combine_markdown_chunks(markdown_chunks)
+                self._save_markdown(full_markdown)
+                self._log("Finished processing Transcription")
            return full_markdown, self._extract_title_from_markdown(full_markdown)
        except Exception as e:
            self._log(f"Error processing Transcription: {str(e)}", level='error')