- Addition of general chunking parameters chunking_heading_level and chunking patterns
- Addition of Processor types docx and markdown
This commit is contained in:
@@ -5,7 +5,46 @@ import json
|
||||
|
||||
from wtforms.fields.choices import SelectField
|
||||
from wtforms.fields.datetime import DateField
|
||||
from common.utils.config_field_types import TaggingFields
|
||||
from common.utils.config_field_types import TaggingFields, json_to_patterns, patterns_to_json
|
||||
|
||||
|
||||
class TaggingFieldsField(TextAreaField):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['render_kw'] = {
|
||||
'class': 'chunking-patterns-field',
|
||||
'data-handle-enter': 'true'
|
||||
}
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# def _value(self):
|
||||
# if self.data:
|
||||
# return json.dumps(self.data)
|
||||
# return ''
|
||||
#
|
||||
# def process_formdata(self, valuelist):
|
||||
# if valuelist and valuelist[0]:
|
||||
# try:
|
||||
# self.data = json.loads(valuelist[0])
|
||||
# except json.JSONDecodeError as e:
|
||||
# raise ValueError('Not valid JSON content')
|
||||
|
||||
|
||||
class ChunkingPatternsField(TextAreaField):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['render_kw'] = {
|
||||
'class': 'chunking-patterns-field',
|
||||
'data-handle-enter': 'true'
|
||||
}
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# def _value(self):
|
||||
# if self.data:
|
||||
# return '\n'.join(self.data)
|
||||
# return ''
|
||||
#
|
||||
# def process_formdata(self, valuelist):
|
||||
# if valuelist and valuelist[0]:
|
||||
# self.data = [line.strip() for line in valuelist[0].split('\n') if line.strip()]
|
||||
|
||||
|
||||
class DynamicFormBase(FlaskForm):
|
||||
@@ -80,7 +119,7 @@ class DynamicFormBase(FlaskForm):
|
||||
|
||||
# Handle special case for tagging_fields
|
||||
if field_type == 'tagging_fields':
|
||||
field_class = TextAreaField
|
||||
field_class = TaggingFieldsField
|
||||
extra_classes = 'json-editor'
|
||||
field_kwargs = {}
|
||||
elif field_type == 'enum':
|
||||
@@ -89,6 +128,10 @@ class DynamicFormBase(FlaskForm):
|
||||
choices = [(str(val), str(val)) for val in allowed_values]
|
||||
extra_classes = ''
|
||||
field_kwargs = {'choices': choices}
|
||||
elif field_type == 'chunking_patterns':
|
||||
field_class = ChunkingPatternsField
|
||||
extra_classes = ['monospace-text', 'pattern-input']
|
||||
field_kwargs = {}
|
||||
else:
|
||||
extra_classes = ''
|
||||
field_class = {
|
||||
@@ -111,6 +154,12 @@ class DynamicFormBase(FlaskForm):
|
||||
except (TypeError, ValueError) as e:
|
||||
current_app.logger.error(f"Error converting initial data to JSON: {e}")
|
||||
field_data = "{}"
|
||||
elif field_type == 'chunking_patterns':
|
||||
try:
|
||||
field_data = json_to_patterns(field_data)
|
||||
except (TypeError, ValueError) as e:
|
||||
current_app.logger.error(f"Error converting initial data to a list of patterns: {e}")
|
||||
field_data = {}
|
||||
elif default is not None:
|
||||
field_data = default
|
||||
|
||||
@@ -173,12 +222,17 @@ class DynamicFormBase(FlaskForm):
|
||||
original_field_name = full_field_name[prefix_length:]
|
||||
field = getattr(self, full_field_name)
|
||||
# Parse JSON for tagging_fields type
|
||||
if isinstance(field, TextAreaField) and field.data:
|
||||
if isinstance(field, TaggingFieldsField) and field.data:
|
||||
try:
|
||||
data[original_field_name] = json.loads(field.data)
|
||||
except json.JSONDecodeError:
|
||||
# Validation should catch this, but just in case
|
||||
data[original_field_name] = field.data
|
||||
elif isinstance(field, ChunkingPatternsField):
|
||||
try:
|
||||
data[original_field_name] = patterns_to_json(field.data)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error converting initial data to patterns: {e}")
|
||||
else:
|
||||
data[original_field_name] = field.data
|
||||
return data
|
||||
@@ -230,5 +284,3 @@ def validate_tagging_fields(form, field):
|
||||
except (TypeError, ValueError) as e:
|
||||
raise ValidationError(f"Invalid field definition: {str(e)}")
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user