- Addition of general chunking parameters chunking_heading_level and chunking patterns

- Addition of Processor types docx and markdown
This commit is contained in:
Josako
2024-12-05 15:19:37 +01:00
parent 311927d5ea
commit d35ec9f5ae
17 changed files with 718 additions and 66 deletions

View File

@@ -14,7 +14,7 @@ import json
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
from common.extensions import db
from common.models.interaction import Specialist, SpecialistRetriever
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
edit_document, \
edit_document_version, refresh_document
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
@@ -391,9 +391,6 @@ def add_document():
sub_file_type = form.sub_file_type.data
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
validate_file_type(extension)
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations: