Remove ModelVariables (model_utils) from application & optimize Tenant

2025-05-20 10:17:08 +02:00
parent 70de4c0328
commit d789e431ca
17 changed files with 83 additions and 206 deletions
--- a/eveai_workers/processors/audio_processor.py
+++ b/eveai_workers/processors/audio_processor.py
@@ -8,20 +8,23 @@ import tempfile
 from common.extensions import minio_client
 import subprocess

+from flask import current_app
+
+from common.utils.model_utils import get_transcription_model
 from .processor_registry import ProcessorRegistry
 from .transcription_processor import TranscriptionBaseProcessor
 from common.utils.business_event_context import current_event


 class AudioProcessor(TranscriptionBaseProcessor):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
-        super().__init__(tenant, model_variables, document_version, catalog, processor)
-        self.transcription_model = model_variables.transcription_model
+    def __init__(self, tenant, document_version, catalog, processor):
+        super().__init__(tenant, document_version, catalog, processor)
+        self.transcription_model = get_transcription_model()
        self.ffmpeg_path = 'ffmpeg'
-        self.max_compression_duration = model_variables.max_compression_duration
-        self.max_transcription_duration = model_variables.max_transcription_duration
-        self.compression_cpu_limit = model_variables.compression_cpu_limit  # CPU usage limit in percentage
-        self.compression_process_delay = model_variables.compression_process_delay  # Delay between processing chunks in seconds
+        self.max_compression_duration = current_app.config['MAX_COMPRESSION_DURATION']
+        self.max_transcription_duration = current_app.config['MAX_TRANSCRIPTION_DURATION']
+        self.compression_cpu_limit = current_app.config['COMPRESSION_CPU_LIMIT']  # CPU usage limit in percentage
+        self.compression_process_delay = current_app.config['COMPRESSION_PROCESS_DELAY']  # Delay between processing chunks in seconds
        self.file_type = document_version.file_type

    def _get_transcription(self):
@@ -154,7 +157,7 @@ class AudioProcessor(TranscriptionBaseProcessor):
                    file_size = os.path.getsize(temp_audio.name)

                    with open(temp_audio.name, 'rb') as audio_file:
-                        transcription = self.model_variables.transcription_model.transcribe(
+                        transcription = self.transcription_model.transcribe(
                            file=audio_file,
                            language=self.document_version.language,
                            response_format='verbose_json',
--- a/eveai_workers/processors/base_processor.py
+++ b/eveai_workers/processors/base_processor.py
@@ -7,9 +7,8 @@ from config.logging_config import TuningLogger


 class BaseProcessor(ABC):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
+    def __init__(self, tenant, document_version, catalog, processor):
        self.tenant = tenant
-        self.model_variables = model_variables
        self.document_version = document_version
        self.catalog = catalog
        self.processor = processor
--- a/eveai_workers/processors/docx_processor.py
+++ b/eveai_workers/processors/docx_processor.py
@@ -7,8 +7,8 @@ import re


 class DocxProcessor(BaseProcessor):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
-        super().__init__(tenant, model_variables, document_version, catalog, processor)
+    def __init__(self, tenant, document_version, catalog, processor):
+        super().__init__(tenant, document_version, catalog, processor)
        self.config = processor.configuration
        self.extract_comments = self.config.get('extract_comments', False)
        self.extract_headers_footers = self.config.get('extract_headers_footers', False)
--- a/eveai_workers/processors/html_processor.py
+++ b/eveai_workers/processors/html_processor.py
@@ -11,8 +11,8 @@ from common.utils.string_list_converter import StringListConverter as SLC


 class HTMLProcessor(BaseProcessor):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
-        super().__init__(tenant, model_variables, document_version, catalog, processor)
+    def __init__(self, tenant, document_version, catalog, processor):
+        super().__init__(tenant, document_version, catalog, processor)
        cat_conf = catalog.configuration
        proc_conf = processor.configuration
        self.html_tags = SLC.string_to_list(proc_conf['html_tags'])
--- a/eveai_workers/processors/markdown_processor.py
+++ b/eveai_workers/processors/markdown_processor.py
@@ -18,8 +18,8 @@ def _find_first_h1(markdown: str) -> str:


 class MarkdownProcessor(BaseProcessor):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
-        super().__init__(tenant, model_variables, document_version, catalog, processor)
+    def __init__(self, tenant, document_version, catalog, processor):
+        super().__init__(tenant, document_version, catalog, processor)

        self.chunk_size = catalog.max_chunk_size
        self.chunk_overlap = 0
--- a/eveai_workers/processors/pdf_processor.py
+++ b/eveai_workers/processors/pdf_processor.py
@@ -16,8 +16,8 @@ from .processor_registry import ProcessorRegistry


 class PDFProcessor(BaseProcessor):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
-        super().__init__(tenant, model_variables, document_version, catalog, processor)
+    def __init__(self, tenant, document_version, catalog, processor):
+        super().__init__(tenant, document_version, catalog, processor)

        self.chunk_size = catalog.max_chunk_size
        self.chunk_overlap = 0
--- a/eveai_workers/processors/transcription_processor.py
+++ b/eveai_workers/processors/transcription_processor.py
@@ -3,6 +3,7 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough
+from flask import current_app

 from common.utils.model_utils import create_language_template, get_embedding_llm, get_template
 from .base_processor import BaseProcessor
@@ -10,9 +11,9 @@ from common.utils.business_event_context import current_event


 class TranscriptionBaseProcessor(BaseProcessor):
-    def __init__(self, tenant, model_variables, document_version, catalog, processor):
-        super().__init__(tenant, model_variables, document_version, catalog, processor)
-        self.annotation_chunk_size = model_variables.annotation_chunk_length
+    def __init__(self, tenant, document_version, catalog, processor):
+        super().__init__(tenant, document_version, catalog, processor)
+        self.annotation_chunk_size = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH']
        self.annotation_chunk_overlap = 0

    def process(self):
--- a/eveai_workers/tasks.py
+++ b/eveai_workers/tasks.py
@@ -17,8 +17,7 @@ from common.models.document import DocumentVersion, Embedding, Document, Process
 from common.models.user import Tenant
 from common.utils.celery_utils import current_celery
 from common.utils.database import Database
-from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \
-    get_embedding_llm, get_template
+from common.utils.model_utils import create_language_template, get_embedding_model_and_class, get_template

 from common.utils.business_event import BusinessEvent
 from common.utils.business_event_context import current_event
@@ -58,9 +57,6 @@ def create_embeddings(tenant_id, document_version_id):
        catalog_id = doc.catalog_id
        catalog = Catalog.query.get_or_404(catalog_id)

-        # Select variables to work with depending on tenant and model
-        model_variables = get_model_variables(tenant_id)
-
        # Define processor related information
        processor_type, processor_class = ProcessorRegistry.get_processor_for_file_type(document_version.file_type)
        processor = get_processor_for_document(catalog_id, document_version.file_type, document_version.sub_file_type)
@@ -102,7 +98,6 @@ def create_embeddings(tenant_id, document_version_id):
            with current_event.create_span(f"{processor_type} Processing"):
                document_processor = processor_class(
                    tenant=tenant,
-                    model_variables=model_variables,
                    document_version=document_version,
                    catalog=catalog,
                    processor=processor
@@ -114,7 +109,7 @@ def create_embeddings(tenant_id, document_version_id):
                })

            with current_event.create_span("Embedding"):
-                embed_markdown(tenant, model_variables, document_version, catalog, document_processor, markdown, title)
+                embed_markdown(tenant, document_version, catalog, document_processor, markdown, title)

            current_event.log("Finished Embedding Creation Task")

@@ -142,7 +137,7 @@ def delete_embeddings_for_document_version(document_version):
        raise


-def embed_markdown(tenant, model_variables, document_version, catalog, processor, markdown, title):
+def embed_markdown(tenant, document_version, catalog, processor, markdown, title):
    # Create potential chunks
    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, processor, markdown,
                                                            catalog.max_chunk_size)
@@ -154,7 +149,7 @@ def embed_markdown(tenant, model_variables, document_version, catalog, processor

    # Enrich chunks
    with current_event.create_span("Enrich Chunks"):
-        enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
+        enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
        processor.log_tuning("Enriched Chunks: ", {'enriched_chunks': enriched_chunks})

    # Create embeddings
@@ -178,10 +173,10 @@ def embed_markdown(tenant, model_variables, document_version, catalog, processor
                            f'on document version {document_version.id} :-)')


-def enrich_chunks(tenant, model_variables, document_version, title, chunks):
+def enrich_chunks(tenant, document_version, title, chunks):
    summary = ''
    if len(chunks) > 1:
-        summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
+        summary = summarize_chunk(tenant, document_version, chunks[0])

    chunk_total_context = (f'Filename: {document_version.object_name}\n'
                           f'User Context:\n{document_version.user_context}\n\n'
@@ -209,7 +204,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
    return enriched_chunks


-def summarize_chunk(tenant, model_variables, document_version, chunk):
+def summarize_chunk(tenant, document_version, chunk):
    current_event.log("Starting Summarizing Chunk")

    template, llm = get_template("summary")