- Move to Mistral iso OpenAI as primary choice

This commit is contained in:
Josako
2025-03-06 14:19:35 +01:00
parent 55a89c11bb
commit c15cabc289
11 changed files with 74 additions and 36 deletions

View File

@@ -17,8 +17,10 @@ from config.model_config import MODEL_CONFIG
from common.extensions import template_manager from common.extensions import template_manager
from common.models.document import EmbeddingMistral from common.models.document import EmbeddingMistral
from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel
from crewai import LLM
llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {} embedding_llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
crewai_llm_model_cache: Dict[Tuple[str, float], LLM] = {}
llm_metrics_handler = LLMMetricsHandler() llm_metrics_handler = LLMMetricsHandler()
@@ -89,11 +91,8 @@ def get_embedding_model_and_class(tenant_id, catalog_id, full_embedding_name):
return embedding_model, embedding_model_class return embedding_model, embedding_model_class
def get_llm(full_model_name, temperature): def get_embedding_llm(full_model_name='mistral.mistral-small-latest', temperature=0.3):
if not full_model_name: llm = embedding_llm_model_cache.get((full_model_name, temperature))
full_model_name = 'openai.gpt-4o' # Default to gpt-4o for now, as this is the original model developed against
llm = llm_model_cache.get((full_model_name, temperature))
if not llm: if not llm:
llm_provider, llm_model_name = full_model_name.split('.') llm_provider, llm_model_name = full_model_name.split('.')
if llm_provider == "openai": if llm_provider == "openai":
@@ -110,8 +109,30 @@ def get_llm(full_model_name, temperature):
temperature=temperature, temperature=temperature,
callbacks=[llm_metrics_handler] callbacks=[llm_metrics_handler]
) )
embedding_llm_model_cache[(full_model_name, temperature)] = llm
llm_model_cache[(full_model_name, temperature)] = llm return llm
def get_crewai_llm(full_model_name='mistral.mistral-large-latest', temperature=0.3):
llm = crewai_llm_model_cache.get((full_model_name, temperature))
if not llm:
llm_provider, llm_model_name = full_model_name.split('.')
crew_full_model_name = f"{llm_provider}/{llm_model_name}"
api_key = None
if llm_provider == "openai":
api_key = current_app.config['OPENAI_API_KEY']
elif llm_provider == "mistral":
api_key = current_app.config['MISTRAL_API_KEY']
llm = LLM(
model=crew_full_model_name,
temperature=temperature,
api_key=api_key
)
crewai_llm_model_cache[(full_model_name, temperature)] = llm
return llm
class ModelVariables: class ModelVariables:

View File

@@ -10,8 +10,11 @@ backstory: >
a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal). a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal).
You always stay correct and polite, whatever happens. And you ensure no discriminating language is used. You always stay correct and polite, whatever happens. And you ensure no discriminating language is used.
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
language the context provided to you is in. language the context provided to you is in. You are participating in a conversation, not writing e.g. an email. Do not
include a salutation or closing greeting in your answer.
{custom_backstory} {custom_backstory}
full_model_name: "mistral.mistral-large-latest"
temperature: 0.3
metadata: metadata:
author: "Josako" author: "Josako"
date_added: "2025-01-08" date_added: "2025-01-08"

View File

@@ -13,6 +13,11 @@ backstory: >
answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might
want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to
combine multiple related questions, and understand how to interweave the questions in the answers when related. combine multiple related questions, and understand how to interweave the questions in the answers when related.
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
language the context provided to you is in. Also, ensure that questions asked do not contradict with the answers
given, or aren't obsolete given the answer provided.
You are participating in a conversation, not writing e.g. an email. Do not include a salutation or closing greeting
in your answer.
{custom_backstory} {custom_backstory}
metadata: metadata:
author: "Josako" author: "Josako"

View File

@@ -7,6 +7,8 @@ task_description: >
in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion! in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion!
Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are
bundled into 1 clear communication back to the user. Use {language} for your consolidated communication. bundled into 1 clear communication back to the user. Use {language} for your consolidated communication.
Be sure to format your answer in markdown when appropriate. Ensure enumerations or bulleted lists are formatted as
lists in markdown.
{custom_description} {custom_description}
Anwers: Anwers:
@@ -15,7 +17,7 @@ task_description: >
Additional Questions: Additional Questions:
%%%{additional_questions}%%% %%%{additional_questions}%%%
expected_output: > expected_output: >
One consolidated communication towards the end user with both answers and maximum {nr_of_questions} questions. One consolidated communication towards the end user.
{custom_expected_output} {custom_expected_output}
metadata: metadata:
author: "Josako" author: "Josako"

View File

@@ -713,6 +713,7 @@ def create_default_rag_library():
description='Default RAG Retriever', description='Default RAG Retriever',
catalog_id=cat.id, catalog_id=cat.id,
type="STANDARD_RAG", type="STANDARD_RAG",
type_version="1.0",
configuration={ configuration={
"es_k": "8", "es_k": "8",
"es_similarity_threshold": 0.3 "es_similarity_threshold": 0.3
@@ -727,7 +728,7 @@ def create_default_rag_library():
spec = Specialist( spec = Specialist(
name='Default RAG Specialist', name='Default RAG Specialist',
description='Default RAG Specialist', description='Default RAG Specialist',
type='STANDARD_RAG', type='STANDARD_RAG_SPECIALIST',
configuration={"temperature": "0.3", "specialist_context": "To be specified"} configuration={"temperature": "0.3", "specialist_context": "To be specified"}
) )
set_logging_information(spec, timestamp) set_logging_information(spec, timestamp)

View File

@@ -26,6 +26,10 @@ class StandardRAGRetriever(BaseRetriever):
retriever = Retriever.query.get_or_404(retriever_id) retriever = Retriever.query.get_or_404(retriever_id)
self.catalog_id = retriever.catalog_id self.catalog_id = retriever.catalog_id
self.tenant_id = tenant_id self.tenant_id = tenant_id
catalog = Catalog.query.get_or_404(self.catalog_id)
self.embedding_model, self.embedding_model_class = get_embedding_model_and_class(self.tenant_id,
self.catalog_id,
catalog.embedding_model)
self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3) self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3)
self.k = retriever.configuration.get('es_k', 8) self.k = retriever.configuration.get('es_k', 8)
self.tuning = retriever.tuning self.tuning = retriever.tuning
@@ -77,10 +81,10 @@ class StandardRAGRetriever(BaseRetriever):
query = arguments.query query = arguments.query
# Get query embedding # Get query embedding
query_embedding = self._get_query_embedding(query) query_embedding = self.embedding_model.embed_query(query)
# Get the appropriate embedding database model # Get the appropriate embedding database model
db_class = self.model_variables.embedding_model_class db_class = self.embedding_model_class
# Get current date for validity checks # Get current date for validity checks
current_date = dt.now(tz=tz.utc).date() current_date = dt.now(tz=tz.utc).date()
@@ -159,12 +163,6 @@ class StandardRAGRetriever(BaseRetriever):
current_app.logger.error(f'Unexpected error in RAG retrieval: {e}') current_app.logger.error(f'Unexpected error in RAG retrieval: {e}')
raise raise
def _get_query_embedding(self, query: str):
"""Get embedding for the query text"""
catalog = Catalog.query.get_or_404(self.catalog_id)
embedding_model, embedding_model_class = get_embedding_model_and_class(self.tenant_id, self.catalog_id,
catalog.embedding_model)
# Register the retriever type # Register the retriever type
RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever) RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever)

View File

@@ -6,7 +6,7 @@ from flask import current_app
from common.models.interaction import Specialist from common.models.interaction import Specialist
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
from common.utils.model_utils import get_model_variables from common.utils.model_utils import get_model_variables, get_crewai_llm
from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments
from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask
from crewai.tools import BaseTool from crewai.tools import BaseTool
@@ -78,6 +78,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}" f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}"
for interaction in self._cached_session.interactions for interaction in self._cached_session.interactions
]) ])
return formatted_history
def _add_task_agent(self, task_name: str, agent_name: str): def _add_task_agent(self, task_name: str, agent_name: str):
self._task_agents[task_name.lower()] = agent_name self._task_agents[task_name.lower()] = agent_name
@@ -119,15 +120,21 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '') agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '')
agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '') agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '')
agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '') agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '')
new_agent = EveAICrewAIAgent( agent_full_model_name = agent_config.get('full_model_name', 'mistral.mistral-large-latest')
self, agent_temperature = agent_config.get('temperature', 0.3)
agent.type.lower(), llm = get_crewai_llm(agent_full_model_name, agent_temperature)
role=agent_role, if not llm:
goal=agent_goal, current_app.logger.error(f"No LLM found for {agent_full_model_name}")
backstory=agent_backstory, raise Exception(f"No LLM found for {agent_full_model_name}")
verbose=agent.tuning, agent_kwargs = {
) "role": agent_role,
"goal": agent_goal,
"backstory": agent_backstory,
"verbose": agent.tuning,
"llm": llm,
}
agent_name = agent.type.lower() agent_name = agent.type.lower()
new_agent = EveAICrewAIAgent(self, agent_name, **agent_kwargs)
self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config) self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config)
self._agents[agent_name] = new_agent self._agents[agent_name] = new_agent
@@ -180,7 +187,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
if name.startswith('latest_'): if name.startswith('latest_'):
element = name[len('latest_'):] element = name[len('latest_'):]
if self._cached_session.interactions: if self._cached_session.interactions:
return self._cached_session.interactions[-1].get(element, '') return self._cached_session.interactions[-1].specialist_results.get(element, '')
else: else:
return {} return {}

View File

@@ -3,7 +3,7 @@ from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnablePassthrough
from common.extensions import db, minio_client from common.extensions import db, minio_client
from common.utils.model_utils import create_language_template from common.utils.model_utils import create_language_template, get_embedding_llm
from .base_processor import BaseProcessor from .base_processor import BaseProcessor
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
from .processor_registry import ProcessorRegistry from .processor_registry import ProcessorRegistry
@@ -81,7 +81,7 @@ class HTMLProcessor(BaseProcessor):
def _generate_markdown_from_html(self, html_content): def _generate_markdown_from_html(self, html_content):
self._log(f'Generating markdown from HTML for tenant {self.tenant.id}') self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
llm = self.model_variables.get_llm() llm = get_embedding_llm()
template = self.model_variables.get_template("html_parse") template = self.model_variables.get_template("html_parse")
parse_prompt = ChatPromptTemplate.from_template(template) parse_prompt = ChatPromptTemplate.from_template(template)
setup = RunnablePassthrough() setup = RunnablePassthrough()

View File

@@ -8,7 +8,7 @@ import re
from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnablePassthrough
from common.extensions import minio_client from common.extensions import minio_client
from common.utils.model_utils import create_language_template from common.utils.model_utils import create_language_template, get_embedding_llm
from .base_processor import BaseProcessor from .base_processor import BaseProcessor
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
from .processor_registry import ProcessorRegistry from .processor_registry import ProcessorRegistry
@@ -210,7 +210,7 @@ class PDFProcessor(BaseProcessor):
return text_splitter.split_text(content) return text_splitter.split_text(content)
def _process_chunks_with_llm(self, chunks): def _process_chunks_with_llm(self, chunks):
llm = self.model_variables.get_llm() llm = get_embedding_llm()
template = self.model_variables.get_template('pdf_parse') template = self.model_variables.get_template('pdf_parse')
pdf_prompt = ChatPromptTemplate.from_template(template) pdf_prompt = ChatPromptTemplate.from_template(template)
setup = RunnablePassthrough() setup = RunnablePassthrough()

View File

@@ -4,7 +4,7 @@ from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnablePassthrough
from common.utils.model_utils import create_language_template from common.utils.model_utils import create_language_template, get_embedding_llm
from .base_processor import BaseProcessor from .base_processor import BaseProcessor
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
@@ -46,7 +46,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
def _process_chunks(self, chunks): def _process_chunks(self, chunks):
self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)}) self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
llm = self.model_variables.get_llm() llm = get_embedding_llm()
template = self.model_variables.get_template('transcript') template = self.model_variables.get_template('transcript')
language_template = create_language_template(template, self.document_version.language) language_template = create_language_template(template, self.document_version.language)
transcript_prompt = ChatPromptTemplate.from_template(language_template) transcript_prompt = ChatPromptTemplate.from_template(language_template)

View File

@@ -17,7 +17,8 @@ from common.models.document import DocumentVersion, Embedding, Document, Process
from common.models.user import Tenant from common.models.user import Tenant
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.database import Database from common.utils.database import Database
from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \
get_embedding_llm
from common.utils.business_event import BusinessEvent from common.utils.business_event import BusinessEvent
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
@@ -209,7 +210,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
def summarize_chunk(tenant, model_variables, document_version, chunk): def summarize_chunk(tenant, model_variables, document_version, chunk):
current_event.log("Starting Summarizing Chunk") current_event.log("Starting Summarizing Chunk")
llm = model_variables.get_llm() llm = get_embedding_llm()
template = model_variables.get_template("summary") template = model_variables.get_template("summary")
language_template = create_language_template(template, document_version.language) language_template = create_language_template(template, document_version.language)
summary_prompt = ChatPromptTemplate.from_template(language_template) summary_prompt = ChatPromptTemplate.from_template(language_template)