Improve HTML Processing + Introduction of Processed File viewer

This commit is contained in:
Josako
2025-05-19 17:18:16 +02:00
parent d2bb51a4a8
commit 70de4c0328
8 changed files with 222 additions and 6 deletions

View File

@@ -106,7 +106,7 @@ class HTMLProcessor(BaseProcessor):
current_chunk = []
current_size = 0
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
for element in soup.find_all(self.html_tags):
element_html = str(element)
element_size = len(element_html)