- Adding a Tenant Type

- Allow filtering on Tenant Types & searching for parts of Tenant names - Implement health checks - Start Prometheus monitoring (needs to be finalized) - Refine audio_processor and srt_processor to reduce duplicate code and support for larger files - Introduce repopack to reason in LLMs about the code
2024-09-13 15:43:40 +02:00
parent 9e14824249
commit 6cf660e622
41 changed files with 687 additions and 579 deletions
--- a/eveai_workers/Processors/html_processor.py
+++ b/eveai_workers/Processors/html_processor.py
@@ -14,6 +14,9 @@ class HTMLProcessor(Processor):
        self.html_end_tags = model_variables['html_end_tags']
        self.html_included_elements = model_variables['html_included_elements']
        self.html_excluded_elements = model_variables['html_excluded_elements']
+        self.chunk_size = model_variables['processing_chunk_size']  # Adjust this based on your LLM's optimal input size
+        self.chunk_overlap = model_variables[
+            'processing_chunk_overlap']  # Adjust for context preservation between chunks

    def process(self):
        self._log("Starting HTML processing")
@@ -70,7 +73,7 @@ class HTMLProcessor(Processor):
        chain = setup | parse_prompt | llm | output_parser

        soup = BeautifulSoup(html_content, 'lxml')
-        chunks = self._split_content(soup)
+        chunks = self._split_content(soup, self.chunk_size)

        markdown_chunks = []
        for chunk in chunks: