- Move to Mistral iso OpenAI as primary choice
This commit is contained in:
@@ -17,8 +17,10 @@ from config.model_config import MODEL_CONFIG
|
|||||||
from common.extensions import template_manager
|
from common.extensions import template_manager
|
||||||
from common.models.document import EmbeddingMistral
|
from common.models.document import EmbeddingMistral
|
||||||
from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel
|
from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel
|
||||||
|
from crewai import LLM
|
||||||
|
|
||||||
llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
|
embedding_llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {}
|
||||||
|
crewai_llm_model_cache: Dict[Tuple[str, float], LLM] = {}
|
||||||
llm_metrics_handler = LLMMetricsHandler()
|
llm_metrics_handler = LLMMetricsHandler()
|
||||||
|
|
||||||
|
|
||||||
@@ -89,11 +91,8 @@ def get_embedding_model_and_class(tenant_id, catalog_id, full_embedding_name):
|
|||||||
return embedding_model, embedding_model_class
|
return embedding_model, embedding_model_class
|
||||||
|
|
||||||
|
|
||||||
def get_llm(full_model_name, temperature):
|
def get_embedding_llm(full_model_name='mistral.mistral-small-latest', temperature=0.3):
|
||||||
if not full_model_name:
|
llm = embedding_llm_model_cache.get((full_model_name, temperature))
|
||||||
full_model_name = 'openai.gpt-4o' # Default to gpt-4o for now, as this is the original model developed against
|
|
||||||
|
|
||||||
llm = llm_model_cache.get((full_model_name, temperature))
|
|
||||||
if not llm:
|
if not llm:
|
||||||
llm_provider, llm_model_name = full_model_name.split('.')
|
llm_provider, llm_model_name = full_model_name.split('.')
|
||||||
if llm_provider == "openai":
|
if llm_provider == "openai":
|
||||||
@@ -110,8 +109,30 @@ def get_llm(full_model_name, temperature):
|
|||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
callbacks=[llm_metrics_handler]
|
callbacks=[llm_metrics_handler]
|
||||||
)
|
)
|
||||||
|
embedding_llm_model_cache[(full_model_name, temperature)] = llm
|
||||||
|
|
||||||
llm_model_cache[(full_model_name, temperature)] = llm
|
return llm
|
||||||
|
|
||||||
|
|
||||||
|
def get_crewai_llm(full_model_name='mistral.mistral-large-latest', temperature=0.3):
|
||||||
|
llm = crewai_llm_model_cache.get((full_model_name, temperature))
|
||||||
|
if not llm:
|
||||||
|
llm_provider, llm_model_name = full_model_name.split('.')
|
||||||
|
crew_full_model_name = f"{llm_provider}/{llm_model_name}"
|
||||||
|
api_key = None
|
||||||
|
if llm_provider == "openai":
|
||||||
|
api_key = current_app.config['OPENAI_API_KEY']
|
||||||
|
elif llm_provider == "mistral":
|
||||||
|
api_key = current_app.config['MISTRAL_API_KEY']
|
||||||
|
|
||||||
|
llm = LLM(
|
||||||
|
model=crew_full_model_name,
|
||||||
|
temperature=temperature,
|
||||||
|
api_key=api_key
|
||||||
|
)
|
||||||
|
crewai_llm_model_cache[(full_model_name, temperature)] = llm
|
||||||
|
|
||||||
|
return llm
|
||||||
|
|
||||||
|
|
||||||
class ModelVariables:
|
class ModelVariables:
|
||||||
|
|||||||
@@ -10,8 +10,11 @@ backstory: >
|
|||||||
a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal).
|
a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal).
|
||||||
You always stay correct and polite, whatever happens. And you ensure no discriminating language is used.
|
You always stay correct and polite, whatever happens. And you ensure no discriminating language is used.
|
||||||
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
|
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
|
||||||
language the context provided to you is in.
|
language the context provided to you is in. You are participating in a conversation, not writing e.g. an email. Do not
|
||||||
|
include a salutation or closing greeting in your answer.
|
||||||
{custom_backstory}
|
{custom_backstory}
|
||||||
|
full_model_name: "mistral.mistral-large-latest"
|
||||||
|
temperature: 0.3
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
date_added: "2025-01-08"
|
date_added: "2025-01-08"
|
||||||
|
|||||||
@@ -13,6 +13,11 @@ backstory: >
|
|||||||
answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might
|
answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might
|
||||||
want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to
|
want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to
|
||||||
combine multiple related questions, and understand how to interweave the questions in the answers when related.
|
combine multiple related questions, and understand how to interweave the questions in the answers when related.
|
||||||
|
You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever
|
||||||
|
language the context provided to you is in. Also, ensure that questions asked do not contradict with the answers
|
||||||
|
given, or aren't obsolete given the answer provided.
|
||||||
|
You are participating in a conversation, not writing e.g. an email. Do not include a salutation or closing greeting
|
||||||
|
in your answer.
|
||||||
{custom_backstory}
|
{custom_backstory}
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ task_description: >
|
|||||||
in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion!
|
in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion!
|
||||||
Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are
|
Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are
|
||||||
bundled into 1 clear communication back to the user. Use {language} for your consolidated communication.
|
bundled into 1 clear communication back to the user. Use {language} for your consolidated communication.
|
||||||
|
Be sure to format your answer in markdown when appropriate. Ensure enumerations or bulleted lists are formatted as
|
||||||
|
lists in markdown.
|
||||||
{custom_description}
|
{custom_description}
|
||||||
|
|
||||||
Anwers:
|
Anwers:
|
||||||
@@ -15,7 +17,7 @@ task_description: >
|
|||||||
Additional Questions:
|
Additional Questions:
|
||||||
%%%{additional_questions}%%%
|
%%%{additional_questions}%%%
|
||||||
expected_output: >
|
expected_output: >
|
||||||
One consolidated communication towards the end user with both answers and maximum {nr_of_questions} questions.
|
One consolidated communication towards the end user.
|
||||||
{custom_expected_output}
|
{custom_expected_output}
|
||||||
metadata:
|
metadata:
|
||||||
author: "Josako"
|
author: "Josako"
|
||||||
|
|||||||
@@ -713,6 +713,7 @@ def create_default_rag_library():
|
|||||||
description='Default RAG Retriever',
|
description='Default RAG Retriever',
|
||||||
catalog_id=cat.id,
|
catalog_id=cat.id,
|
||||||
type="STANDARD_RAG",
|
type="STANDARD_RAG",
|
||||||
|
type_version="1.0",
|
||||||
configuration={
|
configuration={
|
||||||
"es_k": "8",
|
"es_k": "8",
|
||||||
"es_similarity_threshold": 0.3
|
"es_similarity_threshold": 0.3
|
||||||
@@ -727,7 +728,7 @@ def create_default_rag_library():
|
|||||||
spec = Specialist(
|
spec = Specialist(
|
||||||
name='Default RAG Specialist',
|
name='Default RAG Specialist',
|
||||||
description='Default RAG Specialist',
|
description='Default RAG Specialist',
|
||||||
type='STANDARD_RAG',
|
type='STANDARD_RAG_SPECIALIST',
|
||||||
configuration={"temperature": "0.3", "specialist_context": "To be specified"}
|
configuration={"temperature": "0.3", "specialist_context": "To be specified"}
|
||||||
)
|
)
|
||||||
set_logging_information(spec, timestamp)
|
set_logging_information(spec, timestamp)
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ class StandardRAGRetriever(BaseRetriever):
|
|||||||
retriever = Retriever.query.get_or_404(retriever_id)
|
retriever = Retriever.query.get_or_404(retriever_id)
|
||||||
self.catalog_id = retriever.catalog_id
|
self.catalog_id = retriever.catalog_id
|
||||||
self.tenant_id = tenant_id
|
self.tenant_id = tenant_id
|
||||||
|
catalog = Catalog.query.get_or_404(self.catalog_id)
|
||||||
|
self.embedding_model, self.embedding_model_class = get_embedding_model_and_class(self.tenant_id,
|
||||||
|
self.catalog_id,
|
||||||
|
catalog.embedding_model)
|
||||||
self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3)
|
self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3)
|
||||||
self.k = retriever.configuration.get('es_k', 8)
|
self.k = retriever.configuration.get('es_k', 8)
|
||||||
self.tuning = retriever.tuning
|
self.tuning = retriever.tuning
|
||||||
@@ -77,10 +81,10 @@ class StandardRAGRetriever(BaseRetriever):
|
|||||||
query = arguments.query
|
query = arguments.query
|
||||||
|
|
||||||
# Get query embedding
|
# Get query embedding
|
||||||
query_embedding = self._get_query_embedding(query)
|
query_embedding = self.embedding_model.embed_query(query)
|
||||||
|
|
||||||
# Get the appropriate embedding database model
|
# Get the appropriate embedding database model
|
||||||
db_class = self.model_variables.embedding_model_class
|
db_class = self.embedding_model_class
|
||||||
|
|
||||||
# Get current date for validity checks
|
# Get current date for validity checks
|
||||||
current_date = dt.now(tz=tz.utc).date()
|
current_date = dt.now(tz=tz.utc).date()
|
||||||
@@ -159,12 +163,6 @@ class StandardRAGRetriever(BaseRetriever):
|
|||||||
current_app.logger.error(f'Unexpected error in RAG retrieval: {e}')
|
current_app.logger.error(f'Unexpected error in RAG retrieval: {e}')
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _get_query_embedding(self, query: str):
|
|
||||||
"""Get embedding for the query text"""
|
|
||||||
catalog = Catalog.query.get_or_404(self.catalog_id)
|
|
||||||
embedding_model, embedding_model_class = get_embedding_model_and_class(self.tenant_id, self.catalog_id,
|
|
||||||
catalog.embedding_model)
|
|
||||||
|
|
||||||
|
|
||||||
# Register the retriever type
|
# Register the retriever type
|
||||||
RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever)
|
RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from flask import current_app
|
|||||||
|
|
||||||
from common.models.interaction import Specialist
|
from common.models.interaction import Specialist
|
||||||
from common.utils.business_event_context import current_event
|
from common.utils.business_event_context import current_event
|
||||||
from common.utils.model_utils import get_model_variables
|
from common.utils.model_utils import get_model_variables, get_crewai_llm
|
||||||
from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments
|
from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments
|
||||||
from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask
|
from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask
|
||||||
from crewai.tools import BaseTool
|
from crewai.tools import BaseTool
|
||||||
@@ -78,6 +78,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
|||||||
f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}"
|
f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}"
|
||||||
for interaction in self._cached_session.interactions
|
for interaction in self._cached_session.interactions
|
||||||
])
|
])
|
||||||
|
return formatted_history
|
||||||
|
|
||||||
def _add_task_agent(self, task_name: str, agent_name: str):
|
def _add_task_agent(self, task_name: str, agent_name: str):
|
||||||
self._task_agents[task_name.lower()] = agent_name
|
self._task_agents[task_name.lower()] = agent_name
|
||||||
@@ -119,15 +120,21 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
|||||||
agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '')
|
agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '')
|
||||||
agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '')
|
agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '')
|
||||||
agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '')
|
agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '')
|
||||||
new_agent = EveAICrewAIAgent(
|
agent_full_model_name = agent_config.get('full_model_name', 'mistral.mistral-large-latest')
|
||||||
self,
|
agent_temperature = agent_config.get('temperature', 0.3)
|
||||||
agent.type.lower(),
|
llm = get_crewai_llm(agent_full_model_name, agent_temperature)
|
||||||
role=agent_role,
|
if not llm:
|
||||||
goal=agent_goal,
|
current_app.logger.error(f"No LLM found for {agent_full_model_name}")
|
||||||
backstory=agent_backstory,
|
raise Exception(f"No LLM found for {agent_full_model_name}")
|
||||||
verbose=agent.tuning,
|
agent_kwargs = {
|
||||||
)
|
"role": agent_role,
|
||||||
|
"goal": agent_goal,
|
||||||
|
"backstory": agent_backstory,
|
||||||
|
"verbose": agent.tuning,
|
||||||
|
"llm": llm,
|
||||||
|
}
|
||||||
agent_name = agent.type.lower()
|
agent_name = agent.type.lower()
|
||||||
|
new_agent = EveAICrewAIAgent(self, agent_name, **agent_kwargs)
|
||||||
self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config)
|
self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config)
|
||||||
self._agents[agent_name] = new_agent
|
self._agents[agent_name] = new_agent
|
||||||
|
|
||||||
@@ -180,7 +187,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor):
|
|||||||
if name.startswith('latest_'):
|
if name.startswith('latest_'):
|
||||||
element = name[len('latest_'):]
|
element = name[len('latest_'):]
|
||||||
if self._cached_session.interactions:
|
if self._cached_session.interactions:
|
||||||
return self._cached_session.interactions[-1].get(element, '')
|
return self._cached_session.interactions[-1].specialist_results.get(element, '')
|
||||||
else:
|
else:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
from common.extensions import db, minio_client
|
from common.extensions import db, minio_client
|
||||||
from common.utils.model_utils import create_language_template
|
from common.utils.model_utils import create_language_template, get_embedding_llm
|
||||||
from .base_processor import BaseProcessor
|
from .base_processor import BaseProcessor
|
||||||
from common.utils.business_event_context import current_event
|
from common.utils.business_event_context import current_event
|
||||||
from .processor_registry import ProcessorRegistry
|
from .processor_registry import ProcessorRegistry
|
||||||
@@ -81,7 +81,7 @@ class HTMLProcessor(BaseProcessor):
|
|||||||
def _generate_markdown_from_html(self, html_content):
|
def _generate_markdown_from_html(self, html_content):
|
||||||
self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
|
self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
|
||||||
|
|
||||||
llm = self.model_variables.get_llm()
|
llm = get_embedding_llm()
|
||||||
template = self.model_variables.get_template("html_parse")
|
template = self.model_variables.get_template("html_parse")
|
||||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
parse_prompt = ChatPromptTemplate.from_template(template)
|
||||||
setup = RunnablePassthrough()
|
setup = RunnablePassthrough()
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import re
|
|||||||
from langchain_core.runnables import RunnablePassthrough
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
|
||||||
from common.extensions import minio_client
|
from common.extensions import minio_client
|
||||||
from common.utils.model_utils import create_language_template
|
from common.utils.model_utils import create_language_template, get_embedding_llm
|
||||||
from .base_processor import BaseProcessor
|
from .base_processor import BaseProcessor
|
||||||
from common.utils.business_event_context import current_event
|
from common.utils.business_event_context import current_event
|
||||||
from .processor_registry import ProcessorRegistry
|
from .processor_registry import ProcessorRegistry
|
||||||
@@ -210,7 +210,7 @@ class PDFProcessor(BaseProcessor):
|
|||||||
return text_splitter.split_text(content)
|
return text_splitter.split_text(content)
|
||||||
|
|
||||||
def _process_chunks_with_llm(self, chunks):
|
def _process_chunks_with_llm(self, chunks):
|
||||||
llm = self.model_variables.get_llm()
|
llm = get_embedding_llm()
|
||||||
template = self.model_variables.get_template('pdf_parse')
|
template = self.model_variables.get_template('pdf_parse')
|
||||||
pdf_prompt = ChatPromptTemplate.from_template(template)
|
pdf_prompt = ChatPromptTemplate.from_template(template)
|
||||||
setup = RunnablePassthrough()
|
setup = RunnablePassthrough()
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
|
||||||
from common.utils.model_utils import create_language_template
|
from common.utils.model_utils import create_language_template, get_embedding_llm
|
||||||
from .base_processor import BaseProcessor
|
from .base_processor import BaseProcessor
|
||||||
from common.utils.business_event_context import current_event
|
from common.utils.business_event_context import current_event
|
||||||
|
|
||||||
@@ -46,7 +46,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
|
|||||||
|
|
||||||
def _process_chunks(self, chunks):
|
def _process_chunks(self, chunks):
|
||||||
self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
|
self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
|
||||||
llm = self.model_variables.get_llm()
|
llm = get_embedding_llm()
|
||||||
template = self.model_variables.get_template('transcript')
|
template = self.model_variables.get_template('transcript')
|
||||||
language_template = create_language_template(template, self.document_version.language)
|
language_template = create_language_template(template, self.document_version.language)
|
||||||
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||||
|
|||||||
@@ -17,7 +17,8 @@ from common.models.document import DocumentVersion, Embedding, Document, Process
|
|||||||
from common.models.user import Tenant
|
from common.models.user import Tenant
|
||||||
from common.utils.celery_utils import current_celery
|
from common.utils.celery_utils import current_celery
|
||||||
from common.utils.database import Database
|
from common.utils.database import Database
|
||||||
from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class
|
from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \
|
||||||
|
get_embedding_llm
|
||||||
|
|
||||||
from common.utils.business_event import BusinessEvent
|
from common.utils.business_event import BusinessEvent
|
||||||
from common.utils.business_event_context import current_event
|
from common.utils.business_event_context import current_event
|
||||||
@@ -209,7 +210,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
|||||||
|
|
||||||
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
||||||
current_event.log("Starting Summarizing Chunk")
|
current_event.log("Starting Summarizing Chunk")
|
||||||
llm = model_variables.get_llm()
|
llm = get_embedding_llm()
|
||||||
template = model_variables.get_template("summary")
|
template = model_variables.get_template("summary")
|
||||||
language_template = create_language_template(template, document_version.language)
|
language_template = create_language_template(template, document_version.language)
|
||||||
summary_prompt = ChatPromptTemplate.from_template(language_template)
|
summary_prompt = ChatPromptTemplate.from_template(language_template)
|
||||||
|
|||||||
Reference in New Issue
Block a user