- Addition of general chunking parameters chunking_heading_level and chunking patterns
- Addition of Processor types docx and markdown
This commit is contained in:
@@ -15,14 +15,6 @@ from config.type_defs.retriever_types import RETRIEVER_TYPES
|
||||
from .dynamic_form_base import DynamicFormBase
|
||||
|
||||
|
||||
def allowed_file(form, field):
|
||||
if field.data:
|
||||
filename = field.data.filename
|
||||
allowed_extensions = current_app.config.get('SUPPORTED_FILE_TYPES', [])
|
||||
if not ('.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions):
|
||||
raise ValidationError('Unsupported file type.')
|
||||
|
||||
|
||||
def validate_json(form, field):
|
||||
if field.data:
|
||||
try:
|
||||
@@ -101,7 +93,10 @@ class ProcessorForm(FlaskForm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
# Dynamically populate the 'type' field using the constructor
|
||||
self.type.choices = [(key, value['name']) for key, value in PROCESSOR_TYPES.items()]
|
||||
self.type.choices = sorted(
|
||||
[(key, value['name']) for key, value in PROCESSOR_TYPES.items()],
|
||||
key=lambda x: x[1],
|
||||
)
|
||||
|
||||
|
||||
class EditProcessorForm(DynamicFormBase):
|
||||
@@ -177,7 +172,7 @@ class EditRetrieverForm(DynamicFormBase):
|
||||
|
||||
|
||||
class AddDocumentForm(DynamicFormBase):
|
||||
file = FileField('File', validators=[FileRequired(), allowed_file])
|
||||
file = FileField('File', validators=[FileRequired()])
|
||||
catalog = StringField('Catalog', render_kw={'readonly': True})
|
||||
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
|
||||
name = StringField('Name', validators=[Length(max=100)])
|
||||
|
||||
@@ -14,7 +14,7 @@ import json
|
||||
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
|
||||
from common.extensions import db
|
||||
from common.models.interaction import Specialist, SpecialistRetriever
|
||||
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
|
||||
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
|
||||
edit_document, \
|
||||
edit_document_version, refresh_document
|
||||
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
|
||||
@@ -391,9 +391,6 @@ def add_document():
|
||||
sub_file_type = form.sub_file_type.data
|
||||
filename = secure_filename(file.filename)
|
||||
extension = filename.rsplit('.', 1)[1].lower()
|
||||
|
||||
validate_file_type(extension)
|
||||
|
||||
catalog_properties = {}
|
||||
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
|
||||
for config in document_version_configurations:
|
||||
|
||||
@@ -5,7 +5,46 @@ import json
|
||||
|
||||
from wtforms.fields.choices import SelectField
|
||||
from wtforms.fields.datetime import DateField
|
||||
from common.utils.config_field_types import TaggingFields
|
||||
from common.utils.config_field_types import TaggingFields, json_to_patterns, patterns_to_json
|
||||
|
||||
|
||||
class TaggingFieldsField(TextAreaField):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['render_kw'] = {
|
||||
'class': 'chunking-patterns-field',
|
||||
'data-handle-enter': 'true'
|
||||
}
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# def _value(self):
|
||||
# if self.data:
|
||||
# return json.dumps(self.data)
|
||||
# return ''
|
||||
#
|
||||
# def process_formdata(self, valuelist):
|
||||
# if valuelist and valuelist[0]:
|
||||
# try:
|
||||
# self.data = json.loads(valuelist[0])
|
||||
# except json.JSONDecodeError as e:
|
||||
# raise ValueError('Not valid JSON content')
|
||||
|
||||
|
||||
class ChunkingPatternsField(TextAreaField):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['render_kw'] = {
|
||||
'class': 'chunking-patterns-field',
|
||||
'data-handle-enter': 'true'
|
||||
}
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# def _value(self):
|
||||
# if self.data:
|
||||
# return '\n'.join(self.data)
|
||||
# return ''
|
||||
#
|
||||
# def process_formdata(self, valuelist):
|
||||
# if valuelist and valuelist[0]:
|
||||
# self.data = [line.strip() for line in valuelist[0].split('\n') if line.strip()]
|
||||
|
||||
|
||||
class DynamicFormBase(FlaskForm):
|
||||
@@ -80,7 +119,7 @@ class DynamicFormBase(FlaskForm):
|
||||
|
||||
# Handle special case for tagging_fields
|
||||
if field_type == 'tagging_fields':
|
||||
field_class = TextAreaField
|
||||
field_class = TaggingFieldsField
|
||||
extra_classes = 'json-editor'
|
||||
field_kwargs = {}
|
||||
elif field_type == 'enum':
|
||||
@@ -89,6 +128,10 @@ class DynamicFormBase(FlaskForm):
|
||||
choices = [(str(val), str(val)) for val in allowed_values]
|
||||
extra_classes = ''
|
||||
field_kwargs = {'choices': choices}
|
||||
elif field_type == 'chunking_patterns':
|
||||
field_class = ChunkingPatternsField
|
||||
extra_classes = ['monospace-text', 'pattern-input']
|
||||
field_kwargs = {}
|
||||
else:
|
||||
extra_classes = ''
|
||||
field_class = {
|
||||
@@ -111,6 +154,12 @@ class DynamicFormBase(FlaskForm):
|
||||
except (TypeError, ValueError) as e:
|
||||
current_app.logger.error(f"Error converting initial data to JSON: {e}")
|
||||
field_data = "{}"
|
||||
elif field_type == 'chunking_patterns':
|
||||
try:
|
||||
field_data = json_to_patterns(field_data)
|
||||
except (TypeError, ValueError) as e:
|
||||
current_app.logger.error(f"Error converting initial data to a list of patterns: {e}")
|
||||
field_data = {}
|
||||
elif default is not None:
|
||||
field_data = default
|
||||
|
||||
@@ -173,12 +222,17 @@ class DynamicFormBase(FlaskForm):
|
||||
original_field_name = full_field_name[prefix_length:]
|
||||
field = getattr(self, full_field_name)
|
||||
# Parse JSON for tagging_fields type
|
||||
if isinstance(field, TextAreaField) and field.data:
|
||||
if isinstance(field, TaggingFieldsField) and field.data:
|
||||
try:
|
||||
data[original_field_name] = json.loads(field.data)
|
||||
except json.JSONDecodeError:
|
||||
# Validation should catch this, but just in case
|
||||
data[original_field_name] = field.data
|
||||
elif isinstance(field, ChunkingPatternsField):
|
||||
try:
|
||||
data[original_field_name] = patterns_to_json(field.data)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error converting initial data to patterns: {e}")
|
||||
else:
|
||||
data[original_field_name] = field.data
|
||||
return data
|
||||
@@ -230,5 +284,3 @@ def validate_tagging_fields(form, field):
|
||||
except (TypeError, ValueError) as e:
|
||||
raise ValidationError(f"Invalid field definition: {str(e)}")
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user