- Move to Mistral iso OpenAI as primary choice

2025-03-06 14:19:35 +01:00
parent 55a89c11bb
commit c15cabc289
11 changed files with 74 additions and 36 deletions
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -17,8 +17,10 @@ from config.model_config import MODEL_CONFIG
 from common.extensions import template_manager
 from common.models.document import EmbeddingMistral
 from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel
 from crewai import LLM
-llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
+embedding_llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
 crewai_llm_model_cache: Dict[Tuple[str, float], LLM] = {}
 llm_metrics_handler = LLMMetricsHandler()
@@ -89,11 +91,8 @@ def get_embedding_model_and_class(tenant_id, catalog_id, full_embedding_name):
    return embedding_model, embedding_model_class
-def get_llm(full_model_name, temperature):
+def get_embedding_llm(full_model_name='mistral.mistral-small-latest', temperature=0.3):
-    if not full_model_name:
+    llm = embedding_llm_model_cache.get((full_model_name, temperature))
        full_model_name = 'openai.gpt-4o'  # Default to gpt-4o for now, as this is the original model developed against
    llm = llm_model_cache.get((full_model_name, temperature))
    if not llm:
        llm_provider, llm_model_name = full_model_name.split('.')
        if llm_provider == "openai":
@@ -110,8 +109,30 @@ def get_llm(full_model_name, temperature):
                temperature=temperature,
                callbacks=[llm_metrics_handler]
            )
        embedding_llm_model_cache[(full_model_name, temperature)] = llm
-        llm_model_cache[(full_model_name, temperature)] = llm
+    return llm
 def get_crewai_llm(full_model_name='mistral.mistral-large-latest', temperature=0.3):
    llm = crewai_llm_model_cache.get((full_model_name, temperature))
    if not llm:
        llm_provider, llm_model_name = full_model_name.split('.')
        crew_full_model_name = f"{llm_provider}/{llm_model_name}"
        api_key = None
        if llm_provider == "openai":
            api_key = current_app.config['OPENAI_API_KEY']
        elif llm_provider == "mistral":
            api_key = current_app.config['MISTRAL_API_KEY']
        llm = LLM(
            model=crew_full_model_name,
            temperature=temperature,
            api_key=api_key
        )
        crewai_llm_model_cache[(full_model_name, temperature)] = llm
    return llm
 class ModelVariables:
--- a/config/agents/RAG_AGENT/1.0.0.yaml
+++ b/config/agents/RAG_AGENT/1.0.0.yaml
@@ -10,8 +10,11 @@ backstory: >
  a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal). 
  You always stay correct and polite, whatever happens. And you ensure no discriminating language is used.
  You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
-  language the context provided to you is in. 
+  language the context provided to you is in. You are participating in a conversation, not writing e.g. an email. Do not
  include a salutation or closing greeting in your answer.
  {custom_backstory}
 full_model_name: "mistral.mistral-large-latest"
 temperature: 0.3
 metadata:
  author: "Josako"
  date_added: "2025-01-08"
--- a/config/agents/RAG_COMMUNICATION_AGENT/1.0.0.yaml
+++ b/config/agents/RAG_COMMUNICATION_AGENT/1.0.0.yaml
@@ -13,6 +13,11 @@ backstory: >
  answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might 
  want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to
  combine multiple related questions, and understand how to interweave the questions in the answers when related. 
  You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
  language the context provided to you is in. Also, ensure that questions asked do not contradict with the answers 
  given, or aren't obsolete given the answer provided.
  You are participating in a conversation, not writing e.g. an email. Do not include a salutation or closing greeting 
  in your answer.
  {custom_backstory}
 metadata:
  author: "Josako"
--- a/config/tasks/RAG_CONSOLIDATION_TASK/1.0.0.yaml
+++ b/config/tasks/RAG_CONSOLIDATION_TASK/1.0.0.yaml
@@ -7,6 +7,8 @@ task_description: >
  in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion!  
  Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are 
  bundled into 1 clear communication back to the user. Use {language} for your consolidated communication.
  Be sure to format your answer in markdown when appropriate. Ensure enumerations or bulleted lists are formatted as
  lists in markdown.
  {custom_description}
  Anwers:
@@ -15,7 +17,7 @@ task_description: >
  Additional Questions:
  %%%{additional_questions}%%%
 expected_output: >
-  One consolidated communication towards the end user with both answers and maximum {nr_of_questions} questions.
+  One consolidated communication towards the end user.
  {custom_expected_output}
 metadata:
  author: "Josako"
--- a/eveai_app/views/document_views.py
+++ b/eveai_app/views/document_views.py
@@ -713,6 +713,7 @@ def create_default_rag_library():
            description='Default RAG Retriever',
            catalog_id=cat.id,
            type="STANDARD_RAG",
            type_version="1.0",
            configuration={
                "es_k": "8",
                "es_similarity_threshold": 0.3
@@ -727,7 +728,7 @@ def create_default_rag_library():
        spec = Specialist(
            name='Default RAG Specialist',
            description='Default RAG Specialist',
-            type='STANDARD_RAG',
+            type='STANDARD_RAG_SPECIALIST',
            configuration={"temperature": "0.3", "specialist_context": "To be specified"}
        )
        set_logging_information(spec, timestamp)
--- a/eveai_chat_workers/retrievers/standard_rag.py
+++ b/eveai_chat_workers/retrievers/standard_rag.py
@@ -26,6 +26,10 @@ class StandardRAGRetriever(BaseRetriever):
        retriever = Retriever.query.get_or_404(retriever_id)
        self.catalog_id = retriever.catalog_id
        self.tenant_id = tenant_id
        catalog = Catalog.query.get_or_404(self.catalog_id)
        self.embedding_model, self.embedding_model_class = get_embedding_model_and_class(self.tenant_id,
                                                                                         self.catalog_id,
                                                                                         catalog.embedding_model)
        self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3)
        self.k = retriever.configuration.get('es_k', 8)
        self.tuning = retriever.tuning
@@ -77,10 +81,10 @@ class StandardRAGRetriever(BaseRetriever):
            query = arguments.query
            # Get query embedding
-            query_embedding = self._get_query_embedding(query)
+            query_embedding = self.embedding_model.embed_query(query)
            # Get the appropriate embedding database model
-            db_class = self.model_variables.embedding_model_class
+            db_class = self.embedding_model_class
            # Get current date for validity checks
            current_date = dt.now(tz=tz.utc).date()
@@ -159,12 +163,6 @@ class StandardRAGRetriever(BaseRetriever):
            current_app.logger.error(f'Unexpected error in RAG retrieval: {e}')
            raise
    def _get_query_embedding(self, query: str):
        """Get embedding for the query text"""
        catalog = Catalog.query.get_or_404(self.catalog_id)
        embedding_model, embedding_model_class = get_embedding_model_and_class(self.tenant_id, self.catalog_id,
                                                                               catalog.embedding_model)
 # Register the retriever type
 RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever)
--- a/eveai_chat_workers/specialists/crewai_base_specialist.py
+++ b/eveai_chat_workers/specialists/crewai_base_specialist.py
@@ -6,7 +6,7 @@ from flask import current_app
 from common.models.interaction import Specialist
 from common.utils.business_event_context import current_event
-from common.utils.model_utils import get_model_variables
+from common.utils.model_utils import get_model_variables, get_crewai_llm
 from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments
 from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask
 from crewai.tools import BaseTool
@@ -78,6 +78,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
            f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}"
            for interaction in self._cached_session.interactions
        ])
        return formatted_history
    def _add_task_agent(self, task_name: str, agent_name: str):
        self._task_agents[task_name.lower()] = agent_name
@@ -119,15 +120,21 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
            agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '')
            agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '')
            agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '')
-            new_agent = EveAICrewAIAgent(
+            agent_full_model_name = agent_config.get('full_model_name', 'mistral.mistral-large-latest')
-                self,
+            agent_temperature = agent_config.get('temperature', 0.3)
-                agent.type.lower(),
+            llm = get_crewai_llm(agent_full_model_name, agent_temperature)
-                role=agent_role,
+            if not llm:
-                goal=agent_goal,
+                current_app.logger.error(f"No LLM found for {agent_full_model_name}")
-                backstory=agent_backstory,
+                raise Exception(f"No LLM found for {agent_full_model_name}")
-                verbose=agent.tuning,
+            agent_kwargs = {
-            )
+                "role": agent_role,
                "goal": agent_goal,
                "backstory": agent_backstory,
                "verbose": agent.tuning,
                "llm": llm,
            }
            agent_name = agent.type.lower()
            new_agent = EveAICrewAIAgent(self, agent_name, **agent_kwargs)
            self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config)
            self._agents[agent_name] = new_agent
@@ -180,7 +187,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
            if name.startswith('latest_'):
                element = name[len('latest_'):]
                if self._cached_session.interactions:
-                    return self._cached_session.interactions[-1].get(element, '')
+                    return self._cached_session.interactions[-1].specialist_results.get(element, '')
                else:
                    return {}
--- a/eveai_workers/processors/html_processor.py
+++ b/eveai_workers/processors/html_processor.py
@@ -3,7 +3,7 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough
 from common.extensions import db, minio_client
-from common.utils.model_utils import create_language_template
+from common.utils.model_utils import create_language_template, get_embedding_llm
 from .base_processor import BaseProcessor
 from common.utils.business_event_context import current_event
 from .processor_registry import ProcessorRegistry
@@ -81,7 +81,7 @@ class HTMLProcessor(BaseProcessor):
    def _generate_markdown_from_html(self, html_content):
        self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
-        llm = self.model_variables.get_llm()
+        llm = get_embedding_llm()
        template = self.model_variables.get_template("html_parse")
        parse_prompt = ChatPromptTemplate.from_template(template)
        setup = RunnablePassthrough()
--- a/eveai_workers/processors/pdf_processor.py
+++ b/eveai_workers/processors/pdf_processor.py
@@ -8,7 +8,7 @@ import re
 from langchain_core.runnables import RunnablePassthrough
 from common.extensions import minio_client
-from common.utils.model_utils import create_language_template
+from common.utils.model_utils import create_language_template, get_embedding_llm
 from .base_processor import BaseProcessor
 from common.utils.business_event_context import current_event
 from .processor_registry import ProcessorRegistry
@@ -210,7 +210,7 @@ class PDFProcessor(BaseProcessor):
        return text_splitter.split_text(content)
    def _process_chunks_with_llm(self, chunks):
-        llm = self.model_variables.get_llm()
+        llm = get_embedding_llm()
        template = self.model_variables.get_template('pdf_parse')
        pdf_prompt = ChatPromptTemplate.from_template(template)
        setup = RunnablePassthrough()
--- a/eveai_workers/processors/transcription_processor.py
+++ b/eveai_workers/processors/transcription_processor.py
@@ -4,7 +4,7 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnablePassthrough
-from common.utils.model_utils import create_language_template
+from common.utils.model_utils import create_language_template, get_embedding_llm
 from .base_processor import BaseProcessor
 from common.utils.business_event_context import current_event
@@ -46,7 +46,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
    def _process_chunks(self, chunks):
        self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
-        llm = self.model_variables.get_llm()
+        llm = get_embedding_llm()
        template = self.model_variables.get_template('transcript')
        language_template = create_language_template(template, self.document_version.language)
        transcript_prompt = ChatPromptTemplate.from_template(language_template)
--- a/eveai_workers/tasks.py
+++ b/eveai_workers/tasks.py
@@ -17,7 +17,8 @@ from common.models.document import DocumentVersion, Embedding, Document, Process
 from common.models.user import Tenant
 from common.utils.celery_utils import current_celery
 from common.utils.database import Database
-from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class
+from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \
    get_embedding_llm
 from common.utils.business_event import BusinessEvent
 from common.utils.business_event_context import current_event
@@ -209,7 +210,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
 def summarize_chunk(tenant, model_variables, document_version, chunk):
    current_event.log("Starting Summarizing Chunk")
-    llm = model_variables.get_llm()
+    llm = get_embedding_llm()
    template = model_variables.get_template("summary")
    language_template = create_language_template(template, document_version.language)
    summary_prompt = ChatPromptTemplate.from_template(language_template)