Added excluded element classes to HTML parsing to allow for more complex document parsing
Added chunking to conversion of HTML to markdown in case of large files
This commit is contained in:
@@ -32,6 +32,7 @@ class TenantForm(FlaskForm):
|
||||
default='p, li')
|
||||
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
|
||||
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
|
||||
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
|
||||
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], default=2000)
|
||||
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()], default=3000)
|
||||
# Embedding Search variables
|
||||
|
||||
Reference in New Issue
Block a user