- Introduction of dynamic Retrievers & Specialists

- Introduction of dynamic Processors
- Introduction of caching system
- Introduction of a better template manager
- Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists
- Start adaptation of chat client
This commit is contained in:
Josako
2024-11-15 10:00:53 +01:00
parent 55a8a95f79
commit 1807435339
101 changed files with 4181 additions and 1764 deletions

View File

@@ -1,24 +1,23 @@
from datetime import datetime as dt, timezone as tz
from typing import Dict, Any, Optional
from flask import current_app
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from sqlalchemy.exc import SQLAlchemyError
# OpenAI imports
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.exceptions import LangChainException
from common.utils.config_field_types import TaggingFields
from common.utils.database import Database
from common.models.document import Embedding
from common.models.document import Embedding, Catalog
from common.models.user import Tenant
from common.models.interaction import ChatSession, Interaction, InteractionEmbedding
from common.extensions import db
from common.models.interaction import ChatSession, Interaction, InteractionEmbedding, Specialist, SpecialistRetriever
from common.extensions import db, cache_manager
from common.utils.celery_utils import current_celery
from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template
from common.langchain.retrievers.eveai_default_rag_retriever import EveAIDefaultRagRetriever
from common.langchain.retrievers.eveai_history_retriever import EveAIHistoryRetriever
from common.utils.business_event import BusinessEvent
from common.utils.business_event_context import current_event
from config.specialist_types import SPECIALIST_TYPES
from eveai_chat_workers.chat_session_cache import get_chat_history
from eveai_chat_workers.specialists.registry import SpecialistRegistry
from config.retriever_types import RETRIEVER_TYPES
from eveai_chat_workers.specialists.specialist_typing import SpecialistArguments
# Healthcheck task
@@ -27,41 +26,207 @@ def ping():
return 'pong'
def detail_question(question, language, model_variables, session_id):
current_app.logger.debug(f'Detail question: {question}')
current_app.logger.debug(f'model_variables: {model_variables}')
current_app.logger.debug(f'session_id: {session_id}')
retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
llm = model_variables['llm']
template = model_variables['history_template']
language_template = create_language_template(template, language)
full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
history_prompt = ChatPromptTemplate.from_template(full_template)
setup_and_retrieval = RunnableParallel({"history": retriever, "question": RunnablePassthrough()})
output_parser = StrOutputParser()
chain = setup_and_retrieval | history_prompt | llm | output_parser
try:
answer = chain.invoke(question)
return answer
except LangChainException as e:
current_app.logger.error(f'Error detailing question: {e}')
raise
class ArgumentPreparationError(Exception):
"""Custom exception for argument preparation errors"""
pass
@current_celery.task(name='ask_question', queue='llm_interactions')
def ask_question(tenant_id, question, language, session_id, user_timezone, room):
"""returns result structured as follows:
result = {
'answer': 'Your answer here',
'citations': ['http://example.com/citation1', 'http://example.com/citation2'],
'algorithm': 'algorithm_name',
'interaction_id': 'interaction_id_value'
}
def validate_specialist_arguments(specialist_type: str, arguments: Dict[str, Any]) -> None:
"""
with BusinessEvent("Ask Question", tenant_id=tenant_id, chat_session_id=session_id):
current_app.logger.info(f'ask_question: Received question for tenant {tenant_id}: {question}. Processing...')
Validate specialist-specific arguments
Args:
specialist_type: Type of specialist
arguments: Arguments to validate (excluding retriever-specific arguments)
Raises:
ArgumentPreparationError: If validation fails
"""
specialist_config = SPECIALIST_TYPES.get(specialist_type)
if not specialist_config:
raise ArgumentPreparationError(f"Unknown specialist type: {specialist_type}")
required_args = specialist_config.get('arguments', {})
# Check for required arguments
for arg_name, arg_config in required_args.items():
if arg_config.get('required', False) and arg_name not in arguments:
raise ArgumentPreparationError(f"Missing required argument '{arg_name}' for specialist")
if arg_name in arguments:
# Type checking
expected_type = arg_config.get('type')
if expected_type == 'str' and not isinstance(arguments[arg_name], str):
raise ArgumentPreparationError(f"Argument '{arg_name}' must be a string")
elif expected_type == 'int' and not isinstance(arguments[arg_name], int):
raise ArgumentPreparationError(f"Argument '{arg_name}' must be an integer")
def validate_retriever_arguments(retriever_type: str, arguments: Dict[str, Any],
catalog_config: Optional[Dict[str, Any]] = None) -> None:
"""
Validate retriever-specific arguments
Args:
retriever_type: Type of retriever
arguments: Arguments to validate
catalog_config: Optional catalog configuration for metadata validation
Raises:
ArgumentPreparationError: If validation fails
"""
retriever_config = RETRIEVER_TYPES.get(retriever_type)
if not retriever_config:
raise ArgumentPreparationError(f"Unknown retriever type: {retriever_type}")
# Validate standard retriever arguments
required_args = retriever_config.get('arguments', {})
for arg_name, arg_config in required_args.items():
if arg_config.get('required', False) and arg_name not in arguments:
raise ArgumentPreparationError(f"Missing required argument '{arg_name}' for retriever")
# Only validate metadata filters if catalog configuration is provided
if catalog_config and 'metadata_filters' in arguments:
if 'tagging_fields' in catalog_config:
tagging_fields = TaggingFields.from_dict(catalog_config['tagging_fields'])
errors = tagging_fields.validate_argument_values(arguments['metadata_filters'])
if errors:
raise ArgumentPreparationError(f"Invalid metadata filters: {errors}")
def is_retriever_id(key: str) -> bool:
"""
Check if a key represents a valid retriever ID.
Valid formats: positive integers, including leading zeros
Args:
key: String to check
Returns:
bool: True if the key represents a valid retriever ID
"""
try:
# Convert to int to handle leading zeros
value = int(key)
# Ensure it's a positive number
return value > 0
except ValueError:
return False
def prepare_arguments(specialist: Any, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Prepare complete argument dictionary for specialist execution with inheritance
Args:
specialist: Specialist model instance
arguments: Dictionary containing:
- Specialist arguments
- Retriever-specific arguments keyed by retriever ID
Returns:
Dict containing prepared arguments with inheritance applied
Raises:
ArgumentPreparationError: If argument preparation or validation fails
"""
try:
# Separate specialist arguments from retriever arguments
retriever_args = {}
specialist_args = {}
for key, value in arguments.items():
if isinstance(key, str) and is_retriever_id(key): # Retriever ID
retriever_args[key] = value
else:
specialist_args[key] = value
# Validate specialist arguments
validate_specialist_arguments(specialist.type, specialist_args)
# Get all retrievers associated with this specialist
specialist_retrievers = (
SpecialistRetriever.query
.filter_by(specialist_id=specialist.id)
.all()
)
# Process each retriever
prepared_retriever_args = {}
for spec_retriever in specialist_retrievers:
retriever = spec_retriever.retriever
retriever_id = str(retriever.id)
# Get catalog configuration if it exists
catalog_config = None
if retriever.catalog_id:
try:
catalog = Catalog.query.get(retriever.catalog_id)
if catalog:
catalog_config = catalog.configuration
except SQLAlchemyError:
current_app.logger.warning(
f"Could not fetch catalog {retriever.catalog_id} for retriever {retriever_id}"
)
# Start with specialist arguments (inheritance)
inherited_args = specialist_args.copy()
# Override with retriever-specific arguments if provided
if retriever_id in retriever_args:
inherited_args.update(retriever_args[retriever_id])
# Always include the retriever type
inherited_args['type'] = retriever.type
# Validate the combined arguments
validate_retriever_arguments(
retriever.type,
inherited_args,
catalog_config
)
prepared_retriever_args[retriever_id] = inherited_args
# Construct final argument structure
final_arguments = {
**specialist_args,
'retriever_arguments': prepared_retriever_args
}
return final_arguments
except SQLAlchemyError as e:
current_app.logger.error(f'Database error during argument preparation: {e}')
raise ArgumentPreparationError(f"Database error: {str(e)}")
except Exception as e:
current_app.logger.error(f'Error during argument preparation: {e}')
raise ArgumentPreparationError(str(e))
@current_celery.task(name='execute_specialist', queue='llm_interactions')
def execute_specialist(tenant_id: int, specialist_id: int, arguments: Dict[str, Any],
session_id: str, user_timezone: str, room: str) -> dict:
"""
Execute a specialist with given arguments
Args:
tenant_id: ID of the tenant
specialist_id: ID of the specialist to use
arguments: Dictionary containing all required arguments for specialist and retrievers
session_id: Chat session ID
user_timezone: User's timezone
room: Socket.IO room for the response
Returns:
dict: {
'result': Dict - Specialist execution result
'interaction_id': int - Created interaction ID
'room': str - Socket.IO room
}
"""
with BusinessEvent("Execute Specialist", tenant_id=tenant_id, chat_session_id=session_id) as event:
current_app.logger.info(
f'execute_specialist: Processing request for tenant {tenant_id} using specialist {specialist_id}')
try:
# Retrieve the tenant
@@ -69,208 +234,85 @@ def ask_question(tenant_id, question, language, session_id, user_timezone, room)
if not tenant:
raise Exception(f'Tenant {tenant_id} not found.')
# Ensure we are working in the correct database schema
# Switch to correct database schema
Database(tenant_id).switch_schema()
# Ensure we have a session to story history
chat_session = ChatSession.query.filter_by(session_id=session_id).first()
if not chat_session:
# Ensure we have a session
cached_session = cache_manager.chat_session_cache.get_cached_session(
session_id,
create_params={'timezone': user_timezone}
)
# Get specialist from database
specialist = Specialist.query.get_or_404(specialist_id)
# Prepare complete arguments
try:
raw_arguments = prepare_arguments(specialist, arguments)
# Convert the prepared arguments into a SpecialistArguments instance
complete_arguments = SpecialistArguments.create(
type_name=specialist.type,
specialist_args={k: v for k, v in raw_arguments.items() if k != 'retriever_arguments'},
retriever_args=raw_arguments.get('retriever_arguments', {})
)
except ValueError as e:
current_app.logger.error(f'execute_specialist: Error preparing arguments: {e}')
raise
# Create new interaction record
new_interaction = Interaction()
new_interaction.chat_session_id = cached_session.id
new_interaction.timezone = user_timezone
new_interaction.question_at = dt.now(tz.utc)
new_interaction.specialist_id = specialist.id
new_interaction.specialist_arguments = complete_arguments.model_dump(mode='json')
try:
db.session.add(new_interaction)
db.session.commit()
event.update_attribute('interaction_id', new_interaction.id)
except SQLAlchemyError as e:
current_app.logger.error(f'execute_specialist: Error creating interaction: {e}')
raise
with current_event.create_span("Specialist invocation"):
# Initialize specialist instance
specialist_class = SpecialistRegistry.get_specialist_class(specialist.type)
specialist_instance = specialist_class(
tenant_id=tenant_id,
specialist_id=specialist_id,
session_id=session_id,
)
# Execute specialist
result = specialist_instance.execute(complete_arguments)
# Update interaction record
new_interaction.specialist_results = result.model_dump(mode='json') # Store complete result
new_interaction.answer_at = dt.now(tz.utc)
try:
chat_session = ChatSession()
chat_session.session_id = session_id
chat_session.session_start = dt.now(tz.utc)
chat_session.timezone = user_timezone
db.session.add(chat_session)
db.session.add(new_interaction)
db.session.commit()
except SQLAlchemyError as e:
current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
current_app.logger.error(f'execute_specialist: Error updating interaction: {e}')
raise
with current_event.create_span("RAG Answer"):
result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
result['interaction_id'] = interaction.id
result['room'] = room # Include the room in the result
# Now that we have a complete interaction with an answer, add it to the cache
cache_manager.chat_session_cache.add_completed_interaction(session_id, new_interaction)
if result['insufficient_info']:
if 'LLM' in tenant.fallback_algorithms:
with current_event.create_span("Fallback Algorithm LLM"):
result, interaction = answer_using_llm(question, language, tenant, chat_session)
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['LLM']['name']
result['interaction_id'] = interaction.id
result['room'] = room # Include the room in the result
return result
except Exception as e:
current_app.logger.error(f'ask_question: Error processing question: {e}')
raise
def answer_using_tenant_rag(question, language, tenant, chat_session):
new_interaction = Interaction()
new_interaction.question = question
new_interaction.language = language
new_interaction.timezone = chat_session.timezone
new_interaction.appreciation = None
new_interaction.chat_session_id = chat_session.id
new_interaction.question_at = dt.now(tz.utc)
new_interaction.algorithm_used = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
# Select variables to work with depending on tenant model
model_variables = select_model_variables(tenant)
tenant_info = tenant.to_dict()
# Langchain debugging if required
# set_debug(True)
with current_event.create_span("Detail Question"):
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
new_interaction.detailed_question = detailed_question
new_interaction.detailed_question_at = dt.now(tz.utc)
with current_event.create_span("Generate Answer using RAG"):
retriever = EveAIDefaultRagRetriever(1, model_variables, tenant_info)
llm = model_variables['llm']
template = model_variables['rag_template']
language_template = create_language_template(template, language)
full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
rag_prompt = ChatPromptTemplate.from_template(full_template)
setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
new_interaction_embeddings = []
if not model_variables['cited_answer_cls']: # The model doesn't support structured feedback
output_parser = StrOutputParser()
chain = setup_and_retrieval | rag_prompt | llm | output_parser
# Invoke the chain with the actual question
answer = chain.invoke(detailed_question)
new_interaction.answer = answer
result = {
'answer': answer,
'citations': [],
'insufficient_info': False
# Prepare response
response = {
'result': result.model_dump(),
'interaction_id': new_interaction.id,
'room': room
}
else: # The model supports structured feedback
structured_llm = llm.with_structured_output(model_variables['cited_answer_cls'])
return response
chain = setup_and_retrieval | rag_prompt | structured_llm
result = chain.invoke(detailed_question).dict()
current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
new_interaction.answer = result['answer']
# Filter out the existing Embedding IDs
given_embedding_ids = [int(emb_id) for emb_id in result['citations']]
embeddings = (
db.session.query(Embedding)
.filter(Embedding.id.in_(given_embedding_ids))
.all()
)
existing_embedding_ids = [emb.id for emb in embeddings]
urls = list(set(emb.document_version.url for emb in embeddings))
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
current_app.rag_tuning_logger.debug(f'{urls}')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
for emb_id in existing_embedding_ids:
new_interaction_embedding = InteractionEmbedding(embedding_id=emb_id)
new_interaction_embedding.interaction = new_interaction
new_interaction_embeddings.append(new_interaction_embedding)
result['citations'] = urls
# Disable langchain debugging if set above.
# set_debug(False)
new_interaction.answer_at = dt.now(tz.utc)
chat_session.session_end = dt.now(tz.utc)
try:
db.session.add(chat_session)
db.session.add(new_interaction)
db.session.add_all(new_interaction_embeddings)
db.session.commit()
return result, new_interaction
except SQLAlchemyError as e:
current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
raise
def answer_using_llm(question, language, tenant, chat_session):
new_interaction = Interaction()
new_interaction.question = question
new_interaction.language = language
new_interaction.timezone = chat_session.timezone
new_interaction.appreciation = None
new_interaction.chat_session_id = chat_session.id
new_interaction.question_at = dt.now(tz.utc)
new_interaction.algorithm_used = current_app.config['INTERACTION_ALGORITHMS']['LLM']['name']
# Select variables to work with depending on tenant model
model_variables = select_model_variables(tenant)
tenant_info = tenant.to_dict()
# Langchain debugging if required
# set_debug(True)
with current_event.create_span("Detail Question"):
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
new_interaction.detailed_question = detailed_question
new_interaction.detailed_question_at = dt.now(tz.utc)
with current_event.create_span("Detail Answer using LLM"):
retriever = EveAIDefaultRagRetriever(1, model_variables, tenant_info)
llm = model_variables['llm_no_rag']
template = model_variables['encyclopedia_template']
language_template = create_language_template(template, language)
rag_prompt = ChatPromptTemplate.from_template(language_template)
setup = RunnablePassthrough()
output_parser = StrOutputParser()
new_interaction_embeddings = []
chain = setup | rag_prompt | llm | output_parser
input_question = {"question": detailed_question}
# Invoke the chain with the actual question
answer = chain.invoke(input_question)
new_interaction.answer = answer
result = {
'answer': answer,
'citations': [],
'insufficient_info': False
}
# Disable langchain debugging if set above.
# set_debug(False)
new_interaction.answer_at = dt.now(tz.utc)
chat_session.session_end = dt.now(tz.utc)
try:
db.session.add(chat_session)
db.session.add(new_interaction)
db.session.commit()
return result, new_interaction
except SQLAlchemyError as e:
current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
except Exception as e:
current_app.logger.error(f'execute_specialist: Error executing specialist: {e}')
raise