Improve HTML Processing + Introduction of Processed File viewer

2025-05-19 17:18:16 +02:00
parent d2bb51a4a8
commit 70de4c0328
8 changed files with 222 additions and 6 deletions
--- a/eveai_workers/processors/html_processor.py
+++ b/eveai_workers/processors/html_processor.py
@@ -106,7 +106,7 @@ class HTMLProcessor(BaseProcessor):
        current_chunk = []
        current_size = 0

-        for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
+        for element in soup.find_all(self.html_tags):
            element_html = str(element)
            element_size = len(element_html)