- Allow filtering on Tenant Types & searching for parts of Tenant names - Implement health checks - Start Prometheus monitoring (needs to be finalized) - Refine audio_processor and srt_processor to reduce duplicate code and support for larger files - Introduce repopack to reason in LLMs about the code
53 lines
1.6 KiB
Python
53 lines
1.6 KiB
Python
from abc import ABC, abstractmethod
|
|
from flask import current_app
|
|
from common.extensions import minio_client
|
|
|
|
|
|
class Processor(ABC):
|
|
def __init__(self, tenant, model_variables, document_version):
|
|
self.tenant = tenant
|
|
self.model_variables = model_variables
|
|
self.document_version = document_version
|
|
self.embed_tuning = model_variables['embed_tuning']
|
|
|
|
@abstractmethod
|
|
def process(self):
|
|
pass
|
|
|
|
def _save_markdown(self, markdown):
|
|
markdown_filename = f"{self.document_version.id}.md"
|
|
minio_client.upload_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
markdown_filename,
|
|
markdown.encode('utf-8')
|
|
)
|
|
|
|
def _log(self, message, level='debug'):
|
|
logger = current_app.logger
|
|
log_method = getattr(logger, level)
|
|
log_method(
|
|
f"{self.__class__.__name__} - Tenant {self.tenant.id}, Document {self.document_version.id}: {message}")
|
|
|
|
def _save_intermediate(self, content, filename):
|
|
minio_client.upload_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
filename,
|
|
content.encode('utf-8')
|
|
)
|
|
|
|
def _clean_markdown(self, markdown):
|
|
markdown = markdown.strip()
|
|
if markdown.startswith("```markdown"):
|
|
markdown = markdown[len("```markdown"):].strip()
|
|
if markdown.endswith("```"):
|
|
markdown = markdown[:-3].strip()
|
|
|
|
return markdown
|
|
|