Improve HTML Processing + Introduction of Processed File viewer
This commit is contained in:
@@ -106,7 +106,7 @@ class HTMLProcessor(BaseProcessor):
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
|
||||
for element in soup.find_all(self.html_tags):
|
||||
element_html = str(element)
|
||||
element_size = len(element_html)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user