diff --git a/common/utils/model_utils.py b/common/utils/model_utils.py index 9277819..a68418a 100644 --- a/common/utils/model_utils.py +++ b/common/utils/model_utils.py @@ -17,8 +17,10 @@ from config.model_config import MODEL_CONFIG from common.extensions import template_manager from common.models.document import EmbeddingMistral from common.utils.eveai_exceptions import EveAITenantNotFound, EveAIInvalidEmbeddingModel +from crewai import LLM -llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {} +embedding_llm_model_cache: Dict[Tuple[str, float], BaseChatModel] = {} +crewai_llm_model_cache: Dict[Tuple[str, float], LLM] = {} llm_metrics_handler = LLMMetricsHandler() @@ -89,11 +91,8 @@ def get_embedding_model_and_class(tenant_id, catalog_id, full_embedding_name): return embedding_model, embedding_model_class -def get_llm(full_model_name, temperature): - if not full_model_name: - full_model_name = 'openai.gpt-4o' # Default to gpt-4o for now, as this is the original model developed against - - llm = llm_model_cache.get((full_model_name, temperature)) +def get_embedding_llm(full_model_name='mistral.mistral-small-latest', temperature=0.3): + llm = embedding_llm_model_cache.get((full_model_name, temperature)) if not llm: llm_provider, llm_model_name = full_model_name.split('.') if llm_provider == "openai": @@ -110,8 +109,30 @@ def get_llm(full_model_name, temperature): temperature=temperature, callbacks=[llm_metrics_handler] ) + embedding_llm_model_cache[(full_model_name, temperature)] = llm - llm_model_cache[(full_model_name, temperature)] = llm + return llm + + +def get_crewai_llm(full_model_name='mistral.mistral-large-latest', temperature=0.3): + llm = crewai_llm_model_cache.get((full_model_name, temperature)) + if not llm: + llm_provider, llm_model_name = full_model_name.split('.') + crew_full_model_name = f"{llm_provider}/{llm_model_name}" + api_key = None + if llm_provider == "openai": + api_key = current_app.config['OPENAI_API_KEY'] + elif llm_provider == "mistral": + api_key = current_app.config['MISTRAL_API_KEY'] + + llm = LLM( + model=crew_full_model_name, + temperature=temperature, + api_key=api_key + ) + crewai_llm_model_cache[(full_model_name, temperature)] = llm + + return llm class ModelVariables: diff --git a/config/agents/RAG_AGENT/1.0.0.yaml b/config/agents/RAG_AGENT/1.0.0.yaml index 892408a..37591c8 100644 --- a/config/agents/RAG_AGENT/1.0.0.yaml +++ b/config/agents/RAG_AGENT/1.0.0.yaml @@ -10,8 +10,11 @@ backstory: > a very good communicator, and adapt to the style used by the human asking for information (e.g. formal or informal). You always stay correct and polite, whatever happens. And you ensure no discriminating language is used. You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever - language the context provided to you is in. + language the context provided to you is in. You are participating in a conversation, not writing e.g. an email. Do not + include a salutation or closing greeting in your answer. {custom_backstory} +full_model_name: "mistral.mistral-large-latest" +temperature: 0.3 metadata: author: "Josako" date_added: "2025-01-08" diff --git a/config/agents/RAG_COMMUNICATION_AGENT/1.0.0.yaml b/config/agents/RAG_COMMUNICATION_AGENT/1.0.0.yaml index a385b3a..a0c8c82 100644 --- a/config/agents/RAG_COMMUNICATION_AGENT/1.0.0.yaml +++ b/config/agents/RAG_COMMUNICATION_AGENT/1.0.0.yaml @@ -13,6 +13,11 @@ backstory: > answers, and a selection of additional questions to be asked in a conversation. Although your backoffice team might want to ask a myriad of questions, you understand that doesn't fit with the way humans communicate. You know how to combine multiple related questions, and understand how to interweave the questions in the answers when related. + You are perfectly multilingual in all known languages, and do your best to answer questions in {language}, whatever + language the context provided to you is in. Also, ensure that questions asked do not contradict with the answers + given, or aren't obsolete given the answer provided. + You are participating in a conversation, not writing e.g. an email. Do not include a salutation or closing greeting + in your answer. {custom_backstory} metadata: author: "Josako" diff --git a/config/tasks/RAG_CONSOLIDATION_TASK/1.0.0.yaml b/config/tasks/RAG_CONSOLIDATION_TASK/1.0.0.yaml index f67aab1..d536b2b 100644 --- a/config/tasks/RAG_CONSOLIDATION_TASK/1.0.0.yaml +++ b/config/tasks/RAG_CONSOLIDATION_TASK/1.0.0.yaml @@ -7,6 +7,8 @@ task_description: > in no specific order, so don't just pick the first ones, but the ones most appropriate in your opinion! Questions are to be asked when your team proposes questions. You ensure both answers and additional questions are bundled into 1 clear communication back to the user. Use {language} for your consolidated communication. + Be sure to format your answer in markdown when appropriate. Ensure enumerations or bulleted lists are formatted as + lists in markdown. {custom_description} Anwers: @@ -15,7 +17,7 @@ task_description: > Additional Questions: %%%{additional_questions}%%% expected_output: > - One consolidated communication towards the end user with both answers and maximum {nr_of_questions} questions. + One consolidated communication towards the end user. {custom_expected_output} metadata: author: "Josako" diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py index 88edfdd..f1a23b8 100644 --- a/eveai_app/views/document_views.py +++ b/eveai_app/views/document_views.py @@ -713,6 +713,7 @@ def create_default_rag_library(): description='Default RAG Retriever', catalog_id=cat.id, type="STANDARD_RAG", + type_version="1.0", configuration={ "es_k": "8", "es_similarity_threshold": 0.3 @@ -727,7 +728,7 @@ def create_default_rag_library(): spec = Specialist( name='Default RAG Specialist', description='Default RAG Specialist', - type='STANDARD_RAG', + type='STANDARD_RAG_SPECIALIST', configuration={"temperature": "0.3", "specialist_context": "To be specified"} ) set_logging_information(spec, timestamp) diff --git a/eveai_chat_workers/retrievers/standard_rag.py b/eveai_chat_workers/retrievers/standard_rag.py index 6fc4319..4c1b087 100644 --- a/eveai_chat_workers/retrievers/standard_rag.py +++ b/eveai_chat_workers/retrievers/standard_rag.py @@ -26,6 +26,10 @@ class StandardRAGRetriever(BaseRetriever): retriever = Retriever.query.get_or_404(retriever_id) self.catalog_id = retriever.catalog_id self.tenant_id = tenant_id + catalog = Catalog.query.get_or_404(self.catalog_id) + self.embedding_model, self.embedding_model_class = get_embedding_model_and_class(self.tenant_id, + self.catalog_id, + catalog.embedding_model) self.similarity_threshold = retriever.configuration.get('es_similarity_threshold', 0.3) self.k = retriever.configuration.get('es_k', 8) self.tuning = retriever.tuning @@ -77,10 +81,10 @@ class StandardRAGRetriever(BaseRetriever): query = arguments.query # Get query embedding - query_embedding = self._get_query_embedding(query) + query_embedding = self.embedding_model.embed_query(query) # Get the appropriate embedding database model - db_class = self.model_variables.embedding_model_class + db_class = self.embedding_model_class # Get current date for validity checks current_date = dt.now(tz=tz.utc).date() @@ -159,12 +163,6 @@ class StandardRAGRetriever(BaseRetriever): current_app.logger.error(f'Unexpected error in RAG retrieval: {e}') raise - def _get_query_embedding(self, query: str): - """Get embedding for the query text""" - catalog = Catalog.query.get_or_404(self.catalog_id) - embedding_model, embedding_model_class = get_embedding_model_and_class(self.tenant_id, self.catalog_id, - catalog.embedding_model) - # Register the retriever type RetrieverRegistry.register("STANDARD_RAG", StandardRAGRetriever) diff --git a/eveai_chat_workers/specialists/crewai_base_specialist.py b/eveai_chat_workers/specialists/crewai_base_specialist.py index 5c6adfd..b28e8b7 100644 --- a/eveai_chat_workers/specialists/crewai_base_specialist.py +++ b/eveai_chat_workers/specialists/crewai_base_specialist.py @@ -6,7 +6,7 @@ from flask import current_app from common.models.interaction import Specialist from common.utils.business_event_context import current_event -from common.utils.model_utils import get_model_variables +from common.utils.model_utils import get_model_variables, get_crewai_llm from eveai_chat_workers.retrievers.retriever_typing import RetrieverArguments from eveai_chat_workers.specialists.crewai_base_classes import EveAICrewAIAgent, EveAICrewAITask from crewai.tools import BaseTool @@ -78,6 +78,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor): f"AI:\n{interaction.specialist_results.get('rag_output').get('answer', '')}" for interaction in self._cached_session.interactions ]) + return formatted_history def _add_task_agent(self, task_name: str, agent_name: str): self._task_agents[task_name.lower()] = agent_name @@ -119,15 +120,21 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor): agent_role = agent_config.get('role', '').replace('{custom_role}', agent.role or '') agent_goal = agent_config.get('goal', '').replace('{custom_goal}', agent.goal or '') agent_backstory = agent_config.get('backstory', '').replace('{custom_backstory}', agent.backstory or '') - new_agent = EveAICrewAIAgent( - self, - agent.type.lower(), - role=agent_role, - goal=agent_goal, - backstory=agent_backstory, - verbose=agent.tuning, - ) + agent_full_model_name = agent_config.get('full_model_name', 'mistral.mistral-large-latest') + agent_temperature = agent_config.get('temperature', 0.3) + llm = get_crewai_llm(agent_full_model_name, agent_temperature) + if not llm: + current_app.logger.error(f"No LLM found for {agent_full_model_name}") + raise Exception(f"No LLM found for {agent_full_model_name}") + agent_kwargs = { + "role": agent_role, + "goal": agent_goal, + "backstory": agent_backstory, + "verbose": agent.tuning, + "llm": llm, + } agent_name = agent.type.lower() + new_agent = EveAICrewAIAgent(self, agent_name, **agent_kwargs) self.log_tuning(f"CrewAI Agent {agent_name} initialized", agent_config) self._agents[agent_name] = new_agent @@ -180,7 +187,7 @@ class CrewAIBaseSpecialistExecutor(BaseSpecialistExecutor): if name.startswith('latest_'): element = name[len('latest_'):] if self._cached_session.interactions: - return self._cached_session.interactions[-1].get(element, '') + return self._cached_session.interactions[-1].specialist_results.get(element, '') else: return {} diff --git a/eveai_workers/processors/html_processor.py b/eveai_workers/processors/html_processor.py index 75e46fe..4c71fad 100644 --- a/eveai_workers/processors/html_processor.py +++ b/eveai_workers/processors/html_processor.py @@ -3,7 +3,7 @@ from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from common.extensions import db, minio_client -from common.utils.model_utils import create_language_template +from common.utils.model_utils import create_language_template, get_embedding_llm from .base_processor import BaseProcessor from common.utils.business_event_context import current_event from .processor_registry import ProcessorRegistry @@ -81,7 +81,7 @@ class HTMLProcessor(BaseProcessor): def _generate_markdown_from_html(self, html_content): self._log(f'Generating markdown from HTML for tenant {self.tenant.id}') - llm = self.model_variables.get_llm() + llm = get_embedding_llm() template = self.model_variables.get_template("html_parse") parse_prompt = ChatPromptTemplate.from_template(template) setup = RunnablePassthrough() diff --git a/eveai_workers/processors/pdf_processor.py b/eveai_workers/processors/pdf_processor.py index c4dcbe5..b5e1a39 100644 --- a/eveai_workers/processors/pdf_processor.py +++ b/eveai_workers/processors/pdf_processor.py @@ -8,7 +8,7 @@ import re from langchain_core.runnables import RunnablePassthrough from common.extensions import minio_client -from common.utils.model_utils import create_language_template +from common.utils.model_utils import create_language_template, get_embedding_llm from .base_processor import BaseProcessor from common.utils.business_event_context import current_event from .processor_registry import ProcessorRegistry @@ -210,7 +210,7 @@ class PDFProcessor(BaseProcessor): return text_splitter.split_text(content) def _process_chunks_with_llm(self, chunks): - llm = self.model_variables.get_llm() + llm = get_embedding_llm() template = self.model_variables.get_template('pdf_parse') pdf_prompt = ChatPromptTemplate.from_template(template) setup = RunnablePassthrough() diff --git a/eveai_workers/processors/transcription_processor.py b/eveai_workers/processors/transcription_processor.py index 3e3959a..5f8d461 100644 --- a/eveai_workers/processors/transcription_processor.py +++ b/eveai_workers/processors/transcription_processor.py @@ -4,7 +4,7 @@ from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough -from common.utils.model_utils import create_language_template +from common.utils.model_utils import create_language_template, get_embedding_llm from .base_processor import BaseProcessor from common.utils.business_event_context import current_event @@ -46,7 +46,7 @@ class TranscriptionBaseProcessor(BaseProcessor): def _process_chunks(self, chunks): self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)}) - llm = self.model_variables.get_llm() + llm = get_embedding_llm() template = self.model_variables.get_template('transcript') language_template = create_language_template(template, self.document_version.language) transcript_prompt = ChatPromptTemplate.from_template(language_template) diff --git a/eveai_workers/tasks.py b/eveai_workers/tasks.py index 5f3c043..84e3e04 100644 --- a/eveai_workers/tasks.py +++ b/eveai_workers/tasks.py @@ -17,7 +17,8 @@ from common.models.document import DocumentVersion, Embedding, Document, Process from common.models.user import Tenant from common.utils.celery_utils import current_celery from common.utils.database import Database -from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class +from common.utils.model_utils import create_language_template, get_model_variables, get_embedding_model_and_class, \ + get_embedding_llm from common.utils.business_event import BusinessEvent from common.utils.business_event_context import current_event @@ -209,7 +210,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks): def summarize_chunk(tenant, model_variables, document_version, chunk): current_event.log("Starting Summarizing Chunk") - llm = model_variables.get_llm() + llm = get_embedding_llm() template = model_variables.get_template("summary") language_template = create_language_template(template, document_version.language) summary_prompt = ChatPromptTemplate.from_template(language_template)