- Move from OpenAI to Mistral Embeddings

- Move embedding model settings from tenant to catalog - BUG: error processing configuration for chunking patterns in HTML_PROCESSOR - Removed eveai_chat from docker-files and nginx configuration, as it is now obsolete - BUG: error in Library Operations when creating a new default RAG library - BUG: Added public type in migration scripts - Removed SocketIO from all code and requirements.txt
2025-02-25 11:17:19 +01:00
parent c037d4135e
commit 55a89c11bb
34 changed files with 457 additions and 444 deletions
--- a/common/eveai_model/tracked_mistral_embeddings.py
+++ b/common/eveai_model/tracked_mistral_embeddings.py
@@ -0,0 +1,40 @@
+from flask import current_app
+from langchain_mistralai import MistralAIEmbeddings
+from typing import List, Any
+import time
+
+from common.eveai_model.eveai_embedding_base import EveAIEmbeddings
+from common.utils.business_event_context import current_event
+from mistralai import Mistral
+
+
+class TrackedMistralAIEmbeddings(EveAIEmbeddings):
+    def __init__(self, model: str = "mistral_embed"):
+        api_key = current_app.config['MISTRAL_API_KEY']
+        self.client = Mistral(
+            api_key=api_key
+        )
+        self.model = model
+        super().__init__()
+
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        start_time = time.time()
+        result = self.client.embeddings.create(
+            model=self.model,
+            inputs=texts
+        )
+        end_time = time.time()
+
+        metrics = {
+            'total_tokens': result.usage.total_tokens,
+            'prompt_tokens': result.usage.prompt_tokens,  # For embeddings, all tokens are prompt tokens
+            'completion_tokens': result.usage.completion_tokens,
+            'time_elapsed': end_time - start_time,
+            'interaction_type': 'Embedding',
+        }
+        current_event.log_llm_metrics(metrics)
+
+        embeddings = [embedding.embedding for embedding in result.data]
+
+        return embeddings
+