- Introduction of API-functionality (to be continued). Deduplication of document and url uploads between views and api. - Improvements on document processing - introduction of processor classes to streamline document inputs - Removed pure Youtube functionality, as Youtube retrieval of documents continuously changes. But added upload of srt, mp3, ogg and mp4
43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
from abc import ABC, abstractmethod
|
|
from flask import current_app
|
|
from common.extensions import minio_client
|
|
|
|
|
|
class Processor(ABC):
|
|
def __init__(self, tenant, model_variables, document_version):
|
|
self.tenant = tenant
|
|
self.model_variables = model_variables
|
|
self.document_version = document_version
|
|
self.embed_tuning = model_variables['embed_tuning']
|
|
|
|
@abstractmethod
|
|
def process(self):
|
|
pass
|
|
|
|
def _save_markdown(self, markdown):
|
|
markdown_filename = f"{self.document_version.id}.md"
|
|
minio_client.upload_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
markdown_filename,
|
|
markdown.encode('utf-8')
|
|
)
|
|
|
|
def _log(self, message, level='debug'):
|
|
logger = current_app.logger
|
|
log_method = getattr(logger, level)
|
|
log_method(
|
|
f"{self.__class__.__name__} - Tenant {self.tenant.id}, Document {self.document_version.id}: {message}")
|
|
|
|
def _save_intermediate(self, content, filename):
|
|
minio_client.upload_document_file(
|
|
self.tenant.id,
|
|
self.document_version.doc_id,
|
|
self.document_version.language,
|
|
self.document_version.id,
|
|
filename,
|
|
content.encode('utf-8')
|
|
)
|