Remove ModelVariables (model_utils) from application & optimize Tenant
This commit is contained in:
@@ -17,8 +17,7 @@ from common.models.document import DocumentVersion, Embedding, Document, Process
|
||||
from common.models.user import Tenant
|
||||
from common.utils.celery_utils import current_celery
|
||||
from common.utils.database import Database
|
||||
from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \
|
||||
get_embedding_llm, get_template
|
||||
from common.utils.model_utils import create_language_template, get_embedding_model_and_class, get_template
|
||||
|
||||
from common.utils.business_event import BusinessEvent
|
||||
from common.utils.business_event_context import current_event
|
||||
@@ -58,9 +57,6 @@ def create_embeddings(tenant_id, document_version_id):
|
||||
catalog_id = doc.catalog_id
|
||||
catalog = Catalog.query.get_or_404(catalog_id)
|
||||
|
||||
# Select variables to work with depending on tenant and model
|
||||
model_variables = get_model_variables(tenant_id)
|
||||
|
||||
# Define processor related information
|
||||
processor_type, processor_class = ProcessorRegistry.get_processor_for_file_type(document_version.file_type)
|
||||
processor = get_processor_for_document(catalog_id, document_version.file_type, document_version.sub_file_type)
|
||||
@@ -102,7 +98,6 @@ def create_embeddings(tenant_id, document_version_id):
|
||||
with current_event.create_span(f"{processor_type} Processing"):
|
||||
document_processor = processor_class(
|
||||
tenant=tenant,
|
||||
model_variables=model_variables,
|
||||
document_version=document_version,
|
||||
catalog=catalog,
|
||||
processor=processor
|
||||
@@ -114,7 +109,7 @@ def create_embeddings(tenant_id, document_version_id):
|
||||
})
|
||||
|
||||
with current_event.create_span("Embedding"):
|
||||
embed_markdown(tenant, model_variables, document_version, catalog, document_processor, markdown, title)
|
||||
embed_markdown(tenant, document_version, catalog, document_processor, markdown, title)
|
||||
|
||||
current_event.log("Finished Embedding Creation Task")
|
||||
|
||||
@@ -142,7 +137,7 @@ def delete_embeddings_for_document_version(document_version):
|
||||
raise
|
||||
|
||||
|
||||
def embed_markdown(tenant, model_variables, document_version, catalog, processor, markdown, title):
|
||||
def embed_markdown(tenant, document_version, catalog, processor, markdown, title):
|
||||
# Create potential chunks
|
||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, processor, markdown,
|
||||
catalog.max_chunk_size)
|
||||
@@ -154,7 +149,7 @@ def embed_markdown(tenant, model_variables, document_version, catalog, processor
|
||||
|
||||
# Enrich chunks
|
||||
with current_event.create_span("Enrich Chunks"):
|
||||
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
|
||||
enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
|
||||
processor.log_tuning("Enriched Chunks: ", {'enriched_chunks': enriched_chunks})
|
||||
|
||||
# Create embeddings
|
||||
@@ -178,10 +173,10 @@ def embed_markdown(tenant, model_variables, document_version, catalog, processor
|
||||
f'on document version {document_version.id} :-)')
|
||||
|
||||
|
||||
def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||
def enrich_chunks(tenant, document_version, title, chunks):
|
||||
summary = ''
|
||||
if len(chunks) > 1:
|
||||
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
||||
summary = summarize_chunk(tenant, document_version, chunks[0])
|
||||
|
||||
chunk_total_context = (f'Filename: {document_version.object_name}\n'
|
||||
f'User Context:\n{document_version.user_context}\n\n'
|
||||
@@ -209,7 +204,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||
return enriched_chunks
|
||||
|
||||
|
||||
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
||||
def summarize_chunk(tenant, document_version, chunk):
|
||||
current_event.log("Starting Summarizing Chunk")
|
||||
|
||||
template, llm = get_template("summary")
|
||||
|
||||
Reference in New Issue
Block a user