- Improvements on audio processing to limit CPU and memory usage

- Removed Portkey from the equation, and defined explicit monitoring using Langchain native code - Optimization of Business Event logging
2024-10-02 14:11:46 +02:00
parent 883175b8f5
commit b700cfac64
13 changed files with 450 additions and 228 deletions
--- a/eveai_workers/tasks.py
+++ b/eveai_workers/tasks.py
@@ -171,10 +171,12 @@ def embed_markdown(tenant, model_variables, document_version, markdown, title):
                                         model_variables['max_chunk_size'])

    # Enrich chunks
-    enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
+    with current_event.create_span("Enrich Chunks"):
+        enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)

    # Create embeddings
-    embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
+    with current_event.create_span("Create Embeddings"):
+        embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)

    # Update document version and save embeddings
    try:
@@ -194,7 +196,6 @@ def embed_markdown(tenant, model_variables, document_version, markdown, title):


 def enrich_chunks(tenant, model_variables, document_version, title, chunks):
-    current_event.log("Starting Enriching Chunks Processing")
    current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
                             f'on document version {document_version.id}')

@@ -227,7 +228,6 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):

    current_app.logger.debug(f'Finished enriching chunks for tenant {tenant.id} '
                             f'on document version {document_version.id}')
-    current_event.log("Finished Enriching Chunks Processing")

    return enriched_chunks

@@ -261,7 +261,6 @@ def summarize_chunk(tenant, model_variables, document_version, chunk):


 def embed_chunks(tenant, model_variables, document_version, chunks):
-    current_event.log("Starting Embedding Chunks Processing")
    current_app.logger.debug(f'Embedding chunks for tenant {tenant.id} '
                             f'on document version {document_version.id}')
    embedding_model = model_variables['embedding_model']