Added excluded element classes to HTML parsing to allow for more complex document parsing
Added chunking to conversion of HTML to markdown in case of large files
This commit is contained in:
@@ -86,6 +86,7 @@ def select_model_variables(tenant):
|
||||
model_variables['html_end_tags'] = tenant.html_end_tags
|
||||
model_variables['html_included_elements'] = tenant.html_included_elements
|
||||
model_variables['html_excluded_elements'] = tenant.html_excluded_elements
|
||||
model_variables['html_excluded_classes'] = tenant.html_excluded_classes
|
||||
|
||||
# Set Chunk Size variables
|
||||
model_variables['min_chunk_size'] = tenant.min_chunk_size
|
||||
|
||||
Reference in New Issue
Block a user