- Move to Mistral iso OpenAI as primary choice
This commit is contained in:
@@ -17,8 +17,10 @@ from config.model_config import MODEL_CONFIG
|
||||
from common.extensions import template_manager
|
||||
from common.models.document import EmbeddingMistral
|
||||
from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel
|
||||
from crewai import LLM
|
||||
|
||||
llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
|
||||
embedding_llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
|
||||
crewai_llm_model_cache: Dict[Tuple[str, float], LLM] = {}
|
||||
llm_metrics_handler = LLMMetricsHandler()
|
||||
|
||||
|
||||
@@ -89,11 +91,8 @@ def get_embedding_model_and_class(tenant_id, catalog_id, full_embedding_name):
|
||||
return embedding_model, embedding_model_class
|
||||
|
||||
|
||||
def get_llm(full_model_name, temperature):
|
||||
if not full_model_name:
|
||||
full_model_name = 'openai.gpt-4o' # Default to gpt-4o for now, as this is the original model developed against
|
||||
|
||||
llm = llm_model_cache.get((full_model_name, temperature))
|
||||
def get_embedding_llm(full_model_name='mistral.mistral-small-latest', temperature=0.3):
|
||||
llm = embedding_llm_model_cache.get((full_model_name, temperature))
|
||||
if not llm:
|
||||
llm_provider, llm_model_name = full_model_name.split('.')
|
||||
if llm_provider == "openai":
|
||||
@@ -110,8 +109,30 @@ def get_llm(full_model_name, temperature):
|
||||
temperature=temperature,
|
||||
callbacks=[llm_metrics_handler]
|
||||
)
|
||||
embedding_llm_model_cache[(full_model_name, temperature)] = llm
|
||||
|
||||
llm_model_cache[(full_model_name, temperature)] = llm
|
||||
return llm
|
||||
|
||||
|
||||
def get_crewai_llm(full_model_name='mistral.mistral-large-latest', temperature=0.3):
|
||||
llm = crewai_llm_model_cache.get((full_model_name, temperature))
|
||||
if not llm:
|
||||
llm_provider, llm_model_name = full_model_name.split('.')
|
||||
crew_full_model_name = f"{llm_provider}/{llm_model_name}"
|
||||
api_key = None
|
||||
if llm_provider == "openai":
|
||||
api_key = current_app.config['OPENAI_API_KEY']
|
||||
elif llm_provider == "mistral":
|
||||
api_key = current_app.config['MISTRAL_API_KEY']
|
||||
|
||||
llm = LLM(
|
||||
model=crew_full_model_name,
|
||||
temperature=temperature,
|
||||
api_key=api_key
|
||||
)
|
||||
crewai_llm_model_cache[(full_model_name, temperature)] = llm
|
||||
|
||||
return llm
|
||||
|
||||
|
||||
class ModelVariables:
|
||||
|
||||
@@ -10,8 +10,11 @@ backstory: >
|
||||
a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal).
|
||||
You always stay correct and polite, whatever happens. And you ensure no discriminating language is used.
|
||||
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
|
||||
language the context provided to you is in.
|
||||
language the context provided to you is in. You are participating in a conversation, not writing e.g. an email. Do not
|
||||
include a salutation or closing greeting in your answer.
|
||||
{custom_backstory}
|
||||
full_model_name: "mistral.mistral-large-latest"
|
||||
temperature: 0.3
|
||||
metadata:
|
||||
author: "Josako"
|
||||
date_added: "2025-01-08"
|
||||
|
||||
@@ -13,6 +13,11 @@ backstory: >
|
||||
answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might
|
||||
want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to
|
||||
combine multiple related questions, and understand how to interweave the questions in the answers when related.
|
||||
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
|
||||
language the context provided to you is in. Also, ensure that questions asked do not contradict with the answers
|
||||
given, or aren't obsolete given the answer provided.
|
||||
You are participating in a conversation, not writing e.g. an email. Do not include a salutation or closing greeting
|
||||
in your answer.
|
||||
{custom_backstory}
|
||||
metadata:
|
||||
author: "Josako"
|
||||
|
||||
@@ -7,6 +7,8 @@ task_description: >
|
||||
in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion!
|
||||
Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are
|
||||
bundled into 1 clear communication back to the user. Use {language} for your consolidated communication.
|
||||
Be sure to format your answer in markdown when appropriate. Ensure enumerations or bulleted lists are formatted as
|
||||
lists in markdown.
|
||||
{custom_description}
|
||||
|
||||
Anwers:
|
||||
@@ -15,7 +17,7 @@ task_description: >
|
||||
Additional Questions:
|
||||
%%%{additional_questions}%%%
|
||||
expected_output: >
|
||||
One consolidated communication towards the end user with both answers and maximum {nr_of_questions} questions.
|
||||
One consolidated communication towards the end user.
|
||||
{custom_expected_output}
|
||||
metadata:
|
||||
author: "Josako"
|
||||
|
||||
@@ -713,6 +713,7 @@ def create_default_rag_library():
|
||||
description='Default RAG Retriever',
|
||||
catalog_id=cat.id,
|
||||
type="STANDARD_RAG",
|
||||
type_version="1.0",
|
||||
configuration={
|
||||
"es_k": "8",
|
||||
"es_similarity_threshold": 0.3
|
||||
@@ -727,7 +728,7 @@ def create_default_rag_library():
|
||||
spec = Specialist(
|
||||
name='Default RAG Specialist',
|
||||
description='Default RAG Specialist',
|
||||
type='STANDARD_RAG',
|
||||
type='STANDARD_RAG_SPECIALIST',
|
||||
configuration={"temperature": "0.3", "specialist_context": "To be specified"}
|
||||
)
|
||||
set_logging_information(spec, timestamp)
|
||||
|
||||
@@ -26,6 +26,10 @@ class StandardRAGRetriever(BaseRetriever):
|
||||
retriever = Retriever.query.get_or_404(retriever_id)
|
||||
self.catalog_id = retriever.catalog_id
|
||||
self.tenant_id = tenant_id
|
||||
catalog = Catalog.query.get_or_404(self.catalog_id)
|
||||
self.embedding_model, self.embedding_model_class = get_embedding_model_and_class(self.tenant_id,
|
||||
self.catalog_id,
|
||||
catalog.embedding_model)
|
||||
self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3)
|
||||
self.k = retriever.configuration.get('es_k', 8)
|
||||
self.tuning = retriever.tuning
|
||||
@@ -77,10 +81,10 @@ class StandardRAGRetriever(BaseRetriever):
|
||||
query = arguments.query
|
||||
|
||||
# Get query embedding
|
||||
query_embedding = self._get_query_embedding(query)
|
||||
query_embedding = self.embedding_model.embed_query(query)
|
||||
|
||||
# Get the appropriate embedding database model
|
||||
db_class = self.model_variables.embedding_model_class
|
||||
db_class = self.embedding_model_class
|
||||
|
||||
# Get current date for validity checks
|
||||
current_date = dt.now(tz=tz.utc).date()
|
||||
@@ -159,12 +163,6 @@ class StandardRAGRetriever(BaseRetriever):
|
||||
current_app.logger.error(f'Unexpected error in RAG retrieval: {e}')
|
||||
raise
|
||||
|
||||
def _get_query_embedding(self, query: str):
|
||||
"""Get embedding for the query text"""
|
||||
catalog = Catalog.query.get_or_404(self.catalog_id)
|
||||
embedding_model, embedding_model_class = get_embedding_model_and_class(self.tenant_id, self.catalog_id,
|
||||
catalog.embedding_model)
|
||||
|
||||
|
||||
# Register the retriever type
|
||||
RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever)
|
||||
|
||||
@@ -6,7 +6,7 @@ from flask import current_app
|
||||
|
||||
from common.models.interaction import Specialist
|
||||
from common.utils.business_event_context import current_event
|
||||
from common.utils.model_utils import get_model_variables
|
||||
from common.utils.model_utils import get_model_variables, get_crewai_llm
|
||||
from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments
|
||||
from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask
|
||||
from crewai.tools import BaseTool
|
||||
@@ -78,6 +78,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
||||
f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}"
|
||||
for interaction in self._cached_session.interactions
|
||||
])
|
||||
return formatted_history
|
||||
|
||||
def _add_task_agent(self, task_name: str, agent_name: str):
|
||||
self._task_agents[task_name.lower()] = agent_name
|
||||
@@ -119,15 +120,21 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
||||
agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '')
|
||||
agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '')
|
||||
agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '')
|
||||
new_agent = EveAICrewAIAgent(
|
||||
self,
|
||||
agent.type.lower(),
|
||||
role=agent_role,
|
||||
goal=agent_goal,
|
||||
backstory=agent_backstory,
|
||||
verbose=agent.tuning,
|
||||
)
|
||||
agent_full_model_name = agent_config.get('full_model_name', 'mistral.mistral-large-latest')
|
||||
agent_temperature = agent_config.get('temperature', 0.3)
|
||||
llm = get_crewai_llm(agent_full_model_name, agent_temperature)
|
||||
if not llm:
|
||||
current_app.logger.error(f"No LLM found for {agent_full_model_name}")
|
||||
raise Exception(f"No LLM found for {agent_full_model_name}")
|
||||
agent_kwargs = {
|
||||
"role": agent_role,
|
||||
"goal": agent_goal,
|
||||
"backstory": agent_backstory,
|
||||
"verbose": agent.tuning,
|
||||
"llm": llm,
|
||||
}
|
||||
agent_name = agent.type.lower()
|
||||
new_agent = EveAICrewAIAgent(self, agent_name, **agent_kwargs)
|
||||
self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config)
|
||||
self._agents[agent_name] = new_agent
|
||||
|
||||
@@ -180,7 +187,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
||||
if name.startswith('latest_'):
|
||||
element = name[len('latest_'):]
|
||||
if self._cached_session.interactions:
|
||||
return self._cached_session.interactions[-1].get(element, '')
|
||||
return self._cached_session.interactions[-1].specialist_results.get(element, '')
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from common.extensions import db, minio_client
|
||||
from common.utils.model_utils import create_language_template
|
||||
from common.utils.model_utils import create_language_template, get_embedding_llm
|
||||
from .base_processor import BaseProcessor
|
||||
from common.utils.business_event_context import current_event
|
||||
from .processor_registry import ProcessorRegistry
|
||||
@@ -81,7 +81,7 @@ class HTMLProcessor(BaseProcessor):
|
||||
def _generate_markdown_from_html(self, html_content):
|
||||
self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
|
||||
|
||||
llm = self.model_variables.get_llm()
|
||||
llm = get_embedding_llm()
|
||||
template = self.model_variables.get_template("html_parse")
|
||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
||||
setup = RunnablePassthrough()
|
||||
|
||||
@@ -8,7 +8,7 @@ import re
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
|
||||
from common.extensions import minio_client
|
||||
from common.utils.model_utils import create_language_template
|
||||
from common.utils.model_utils import create_language_template, get_embedding_llm
|
||||
from .base_processor import BaseProcessor
|
||||
from common.utils.business_event_context import current_event
|
||||
from .processor_registry import ProcessorRegistry
|
||||
@@ -210,7 +210,7 @@ class PDFProcessor(BaseProcessor):
|
||||
return text_splitter.split_text(content)
|
||||
|
||||
def _process_chunks_with_llm(self, chunks):
|
||||
llm = self.model_variables.get_llm()
|
||||
llm = get_embedding_llm()
|
||||
template = self.model_variables.get_template('pdf_parse')
|
||||
pdf_prompt = ChatPromptTemplate.from_template(template)
|
||||
setup = RunnablePassthrough()
|
||||
|
||||
@@ -4,7 +4,7 @@ from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
|
||||
from common.utils.model_utils import create_language_template
|
||||
from common.utils.model_utils import create_language_template, get_embedding_llm
|
||||
from .base_processor import BaseProcessor
|
||||
from common.utils.business_event_context import current_event
|
||||
|
||||
@@ -46,7 +46,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
|
||||
|
||||
def _process_chunks(self, chunks):
|
||||
self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
|
||||
llm = self.model_variables.get_llm()
|
||||
llm = get_embedding_llm()
|
||||
template = self.model_variables.get_template('transcript')
|
||||
language_template = create_language_template(template, self.document_version.language)
|
||||
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
|
||||
@@ -17,7 +17,8 @@ from common.models.document import DocumentVersion, Embedding, Document, Process
|
||||
from common.models.user import Tenant
|
||||
from common.utils.celery_utils import current_celery
|
||||
from common.utils.database import Database
|
||||
from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class
|
||||
from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \
|
||||
get_embedding_llm
|
||||
|
||||
from common.utils.business_event import BusinessEvent
|
||||
from common.utils.business_event_context import current_event
|
||||
@@ -209,7 +210,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||
|
||||
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
||||
current_event.log("Starting Summarizing Chunk")
|
||||
llm = model_variables.get_llm()
|
||||
llm = get_embedding_llm()
|
||||
template = model_variables.get_template("summary")
|
||||
language_template = create_language_template(template, document_version.language)
|
||||
summary_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
|
||||
Reference in New Issue
Block a user