- Addition of general chunking parameters chunking_heading_level and chunking patterns

- Addition of Processor types docx and markdown
This commit is contained in:
Josako
2024-12-05 15:19:37 +01:00
parent 311927d5ea
commit d35ec9f5ae
17 changed files with 718 additions and 66 deletions

View File

@@ -15,14 +15,6 @@ from config.type_defs.retriever_types import RETRIEVER_TYPES
from .dynamic_form_base import DynamicFormBase
def allowed_file(form, field):
if field.data:
filename = field.data.filename
allowed_extensions = current_app.config.get('SUPPORTED_FILE_TYPES', [])
if not ('.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions):
raise ValidationError('Unsupported file type.')
def validate_json(form, field):
if field.data:
try:
@@ -101,7 +93,10 @@ class ProcessorForm(FlaskForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in PROCESSOR_TYPES.items()]
self.type.choices = sorted(
[(key, value['name']) for key, value in PROCESSOR_TYPES.items()],
key=lambda x: x[1],
)
class EditProcessorForm(DynamicFormBase):
@@ -177,7 +172,7 @@ class EditRetrieverForm(DynamicFormBase):
class AddDocumentForm(DynamicFormBase):
file = FileField('File', validators=[FileRequired(), allowed_file])
file = FileField('File', validators=[FileRequired()])
catalog = StringField('Catalog', render_kw={'readonly': True})
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
name = StringField('Name', validators=[Length(max=100)])