- Improvements on audio processing to limit CPU and memory usage

- Removed Portkey from the equation, and defined explicit monitoring using Langchain native code
- Optimization of Business Event logging
This commit is contained in:
Josako
2024-10-02 14:11:46 +02:00
parent 883175b8f5
commit b700cfac64
13 changed files with 450 additions and 228 deletions

View File

@@ -171,10 +171,12 @@ def embed_markdown(tenant, model_variables, document_version, markdown, title):
model_variables['max_chunk_size'])
# Enrich chunks
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
with current_event.create_span("Enrich Chunks"):
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
# Create embeddings
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
with current_event.create_span("Create Embeddings"):
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
# Update document version and save embeddings
try:
@@ -194,7 +196,6 @@ def embed_markdown(tenant, model_variables, document_version, markdown, title):
def enrich_chunks(tenant, model_variables, document_version, title, chunks):
current_event.log("Starting Enriching Chunks Processing")
current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
f'on document version {document_version.id}')
@@ -227,7 +228,6 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
current_app.logger.debug(f'Finished enriching chunks for tenant {tenant.id} '
f'on document version {document_version.id}')
current_event.log("Finished Enriching Chunks Processing")
return enriched_chunks
@@ -261,7 +261,6 @@ def summarize_chunk(tenant, model_variables, document_version, chunk):
def embed_chunks(tenant, model_variables, document_version, chunks):
current_event.log("Starting Embedding Chunks Processing")
current_app.logger.debug(f'Embedding chunks for tenant {tenant.id} '
f'on document version {document_version.id}')
embedding_model = model_variables['embedding_model']