Remove ModelVariables (model_utils) from application & optimize Tenant
This commit is contained in:
@@ -8,20 +8,23 @@ import tempfile
|
||||
from common.extensions import minio_client
|
||||
import subprocess
|
||||
|
||||
from flask import current_app
|
||||
|
||||
from common.utils.model_utils import get_transcription_model
|
||||
from .processor_registry import ProcessorRegistry
|
||||
from .transcription_processor import TranscriptionBaseProcessor
|
||||
from common.utils.business_event_context import current_event
|
||||
|
||||
|
||||
class AudioProcessor(TranscriptionBaseProcessor):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
super().__init__(tenant, model_variables, document_version, catalog, processor)
|
||||
self.transcription_model = model_variables.transcription_model
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
super().__init__(tenant, document_version, catalog, processor)
|
||||
self.transcription_model = get_transcription_model()
|
||||
self.ffmpeg_path = 'ffmpeg'
|
||||
self.max_compression_duration = model_variables.max_compression_duration
|
||||
self.max_transcription_duration = model_variables.max_transcription_duration
|
||||
self.compression_cpu_limit = model_variables.compression_cpu_limit # CPU usage limit in percentage
|
||||
self.compression_process_delay = model_variables.compression_process_delay # Delay between processing chunks in seconds
|
||||
self.max_compression_duration = current_app.config['MAX_COMPRESSION_DURATION']
|
||||
self.max_transcription_duration = current_app.config['MAX_TRANSCRIPTION_DURATION']
|
||||
self.compression_cpu_limit = current_app.config['COMPRESSION_CPU_LIMIT'] # CPU usage limit in percentage
|
||||
self.compression_process_delay = current_app.config['COMPRESSION_PROCESS_DELAY'] # Delay between processing chunks in seconds
|
||||
self.file_type = document_version.file_type
|
||||
|
||||
def _get_transcription(self):
|
||||
@@ -154,7 +157,7 @@ class AudioProcessor(TranscriptionBaseProcessor):
|
||||
file_size = os.path.getsize(temp_audio.name)
|
||||
|
||||
with open(temp_audio.name, 'rb') as audio_file:
|
||||
transcription = self.model_variables.transcription_model.transcribe(
|
||||
transcription = self.transcription_model.transcribe(
|
||||
file=audio_file,
|
||||
language=self.document_version.language,
|
||||
response_format='verbose_json',
|
||||
|
||||
@@ -7,9 +7,8 @@ from config.logging_config import TuningLogger
|
||||
|
||||
|
||||
class BaseProcessor(ABC):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
self.tenant = tenant
|
||||
self.model_variables = model_variables
|
||||
self.document_version = document_version
|
||||
self.catalog = catalog
|
||||
self.processor = processor
|
||||
|
||||
@@ -7,8 +7,8 @@ import re
|
||||
|
||||
|
||||
class DocxProcessor(BaseProcessor):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
super().__init__(tenant, model_variables, document_version, catalog, processor)
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
super().__init__(tenant, document_version, catalog, processor)
|
||||
self.config = processor.configuration
|
||||
self.extract_comments = self.config.get('extract_comments', False)
|
||||
self.extract_headers_footers = self.config.get('extract_headers_footers', False)
|
||||
|
||||
@@ -11,8 +11,8 @@ from common.utils.string_list_converter import StringListConverter as SLC
|
||||
|
||||
|
||||
class HTMLProcessor(BaseProcessor):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
super().__init__(tenant, model_variables, document_version, catalog, processor)
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
super().__init__(tenant, document_version, catalog, processor)
|
||||
cat_conf = catalog.configuration
|
||||
proc_conf = processor.configuration
|
||||
self.html_tags = SLC.string_to_list(proc_conf['html_tags'])
|
||||
|
||||
@@ -18,8 +18,8 @@ def _find_first_h1(markdown: str) -> str:
|
||||
|
||||
|
||||
class MarkdownProcessor(BaseProcessor):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
super().__init__(tenant, model_variables, document_version, catalog, processor)
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
super().__init__(tenant, document_version, catalog, processor)
|
||||
|
||||
self.chunk_size = catalog.max_chunk_size
|
||||
self.chunk_overlap = 0
|
||||
|
||||
@@ -16,8 +16,8 @@ from .processor_registry import ProcessorRegistry
|
||||
|
||||
|
||||
class PDFProcessor(BaseProcessor):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
super().__init__(tenant, model_variables, document_version, catalog, processor)
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
super().__init__(tenant, document_version, catalog, processor)
|
||||
|
||||
self.chunk_size = catalog.max_chunk_size
|
||||
self.chunk_overlap = 0
|
||||
|
||||
@@ -3,6 +3,7 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from flask import current_app
|
||||
|
||||
from common.utils.model_utils import create_language_template, get_embedding_llm, get_template
|
||||
from .base_processor import BaseProcessor
|
||||
@@ -10,9 +11,9 @@ from common.utils.business_event_context import current_event
|
||||
|
||||
|
||||
class TranscriptionBaseProcessor(BaseProcessor):
|
||||
def __init__(self, tenant, model_variables, document_version, catalog, processor):
|
||||
super().__init__(tenant, model_variables, document_version, catalog, processor)
|
||||
self.annotation_chunk_size = model_variables.annotation_chunk_length
|
||||
def __init__(self, tenant, document_version, catalog, processor):
|
||||
super().__init__(tenant, document_version, catalog, processor)
|
||||
self.annotation_chunk_size = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH']
|
||||
self.annotation_chunk_overlap = 0
|
||||
|
||||
def process(self):
|
||||
|
||||
Reference in New Issue
Block a user