- Addition of general chunking parameters chunking_heading_level and chunking patterns

- Addition of Processor types docx and markdown
This commit is contained in:
Josako
2024-12-05 15:19:37 +01:00
parent 311927d5ea
commit d35ec9f5ae
17 changed files with 718 additions and 66 deletions

View File

@@ -17,7 +17,7 @@ class BaseProcessor(ABC):
self.tuning_logger = None
self._setup_tuning_logger()
self._log_tuning("Processor initialized", {
self.log_tuning("Processor initialized", {
"processor_type": processor.type if processor else None,
"document_version": document_version.id if document_version else None,
"catalog": catalog.id if catalog else None
@@ -42,6 +42,10 @@ class BaseProcessor(ABC):
def process(self):
pass
@property
def configuration(self):
return self.processor.configuration
def _save_markdown(self, markdown):
markdown_filename = f"{self.document_version.id}.md"
minio_client.upload_document_file(
@@ -78,7 +82,7 @@ class BaseProcessor(ABC):
return markdown
def _log_tuning(self, message: str, data: Dict[str, Any] = None) -> None:
def log_tuning(self, message: str, data: Dict[str, Any] = None) -> None:
if self.tuning and self.tuning_logger:
try:
self.tuning_logger.log_tuning('processor', message, data)