- Significantly changed the PDF Processor to use Mistral's OCR model

- ensure very long chunks get split into smaller chunks
- ensure TrackedMistralAIEmbedding is batched if needed to ensure correct execution
- upgraded some of the packages to a higher version
This commit is contained in:
Josako
2025-04-16 15:39:16 +02:00
parent 5f58417d24
commit 4bf12db142
10 changed files with 518 additions and 91 deletions

View File

@@ -97,6 +97,7 @@ def persist_business_events(log_entries):
llm_metrics_total_tokens=entry.pop('llm_metrics_total_tokens', None),
llm_metrics_prompt_tokens=entry.pop('llm_metrics_prompt_tokens', None),
llm_metrics_completion_tokens=entry.pop('llm_metrics_completion_tokens', None),
llm_metrics_nr_of_pages=entry.pop('llm_metrics_nr_of_pages', None),
llm_metrics_total_time=entry.pop('llm_metrics_total_time', None),
llm_metrics_call_count=entry.pop('llm_metrics_call_count', None),
llm_interaction_type=entry.pop('llm_interaction_type', None),