Business event tracing completed for both eveai_workers tasks and eveai_chat_workers tasks
This commit is contained in:
@@ -24,6 +24,8 @@ from common.utils.celery_utils import current_celery
|
||||
from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template
|
||||
from common.langchain.eveai_retriever import EveAIRetriever
|
||||
from common.langchain.eveai_history_retriever import EveAIHistoryRetriever
|
||||
from common.utils.business_event import BusinessEvent
|
||||
from common.utils.business_event_context import current_event
|
||||
|
||||
|
||||
# Healthcheck task
|
||||
@@ -65,53 +67,56 @@ def ask_question(tenant_id, question, language, session_id, user_timezone, room)
|
||||
'interaction_id': 'interaction_id_value'
|
||||
}
|
||||
"""
|
||||
current_app.logger.info(f'ask_question: Received question for tenant {tenant_id}: {question}. Processing...')
|
||||
with BusinessEvent("Ask Question", tenant_id=tenant_id, session_id=session_id):
|
||||
current_app.logger.info(f'ask_question: Received question for tenant {tenant_id}: {question}. Processing...')
|
||||
|
||||
try:
|
||||
# Retrieve the tenant
|
||||
tenant = Tenant.query.get(tenant_id)
|
||||
if not tenant:
|
||||
raise Exception(f'Tenant {tenant_id} not found.')
|
||||
try:
|
||||
# Retrieve the tenant
|
||||
tenant = Tenant.query.get(tenant_id)
|
||||
if not tenant:
|
||||
raise Exception(f'Tenant {tenant_id} not found.')
|
||||
|
||||
# Ensure we are working in the correct database schema
|
||||
Database(tenant_id).switch_schema()
|
||||
# Ensure we are working in the correct database schema
|
||||
Database(tenant_id).switch_schema()
|
||||
|
||||
# Ensure we have a session to story history
|
||||
chat_session = ChatSession.query.filter_by(session_id=session_id).first()
|
||||
if not chat_session:
|
||||
try:
|
||||
chat_session = ChatSession()
|
||||
chat_session.session_id = session_id
|
||||
chat_session.session_start = dt.now(tz.utc)
|
||||
chat_session.timezone = user_timezone
|
||||
db.session.add(chat_session)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
|
||||
raise
|
||||
# Ensure we have a session to story history
|
||||
chat_session = ChatSession.query.filter_by(session_id=session_id).first()
|
||||
if not chat_session:
|
||||
try:
|
||||
chat_session = ChatSession()
|
||||
chat_session.session_id = session_id
|
||||
chat_session.session_start = dt.now(tz.utc)
|
||||
chat_session.timezone = user_timezone
|
||||
db.session.add(chat_session)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
|
||||
raise
|
||||
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'Received question for tenant {tenant_id}:\n{question}. Processing...')
|
||||
current_app.rag_tuning_logger.debug(f'Tenant Information: \n{tenant.to_dict()}')
|
||||
current_app.rag_tuning_logger.debug(f'===================================================================')
|
||||
current_app.rag_tuning_logger.debug(f'===================================================================')
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'Received question for tenant {tenant_id}:\n{question}. Processing...')
|
||||
current_app.rag_tuning_logger.debug(f'Tenant Information: \n{tenant.to_dict()}')
|
||||
current_app.rag_tuning_logger.debug(f'===================================================================')
|
||||
current_app.rag_tuning_logger.debug(f'===================================================================')
|
||||
|
||||
result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
|
||||
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
|
||||
result['interaction_id'] = interaction.id
|
||||
result['room'] = room # Include the room in the result
|
||||
|
||||
if result['insufficient_info']:
|
||||
if 'LLM' in tenant.fallback_algorithms:
|
||||
result, interaction = answer_using_llm(question, language, tenant, chat_session)
|
||||
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['LLM']['name']
|
||||
with current_event.create_span("RAG Answer"):
|
||||
result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
|
||||
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
|
||||
result['interaction_id'] = interaction.id
|
||||
result['room'] = room # Include the room in the result
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'ask_question: Error processing question: {e}')
|
||||
raise
|
||||
if result['insufficient_info']:
|
||||
if 'LLM' in tenant.fallback_algorithms:
|
||||
with current_event.create_span("Fallback Algorithm LLM"):
|
||||
result, interaction = answer_using_llm(question, language, tenant, chat_session)
|
||||
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['LLM']['name']
|
||||
result['interaction_id'] = interaction.id
|
||||
result['room'] = room # Include the room in the result
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'ask_question: Error processing question: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def answer_using_tenant_rag(question, language, tenant, chat_session):
|
||||
@@ -131,92 +136,94 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
|
||||
# Langchain debugging if required
|
||||
# set_debug(True)
|
||||
|
||||
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
|
||||
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
|
||||
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
|
||||
new_interaction.detailed_question = detailed_question
|
||||
new_interaction.detailed_question_at = dt.now(tz.utc)
|
||||
|
||||
retriever = EveAIRetriever(model_variables, tenant_info)
|
||||
llm = model_variables['llm']
|
||||
template = model_variables['rag_template']
|
||||
language_template = create_language_template(template, language)
|
||||
full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
|
||||
rag_prompt = ChatPromptTemplate.from_template(full_template)
|
||||
setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
|
||||
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
|
||||
|
||||
new_interaction_embeddings = []
|
||||
if not model_variables['cited_answer_cls']: # The model doesn't support structured feedback
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
chain = setup_and_retrieval | rag_prompt | llm | output_parser
|
||||
|
||||
# Invoke the chain with the actual question
|
||||
answer = chain.invoke(detailed_question)
|
||||
new_interaction.answer = answer
|
||||
result = {
|
||||
'answer': answer,
|
||||
'citations': [],
|
||||
'insufficient_info': False
|
||||
}
|
||||
|
||||
else: # The model supports structured feedback
|
||||
structured_llm = llm.with_structured_output(model_variables['cited_answer_cls'])
|
||||
|
||||
chain = setup_and_retrieval | rag_prompt | structured_llm
|
||||
|
||||
result = chain.invoke(detailed_question).dict()
|
||||
current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
|
||||
current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
|
||||
current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
|
||||
with current_event.create_span("Detail Question"):
|
||||
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
|
||||
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
|
||||
current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
|
||||
current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
|
||||
current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
|
||||
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
|
||||
new_interaction.answer = result['answer']
|
||||
new_interaction.detailed_question = detailed_question
|
||||
new_interaction.detailed_question_at = dt.now(tz.utc)
|
||||
|
||||
# Filter out the existing Embedding IDs
|
||||
given_embedding_ids = [int(emb_id) for emb_id in result['citations']]
|
||||
embeddings = (
|
||||
db.session.query(Embedding)
|
||||
.filter(Embedding.id.in_(given_embedding_ids))
|
||||
.all()
|
||||
)
|
||||
existing_embedding_ids = [emb.id for emb in embeddings]
|
||||
urls = list(set(emb.document_version.url for emb in embeddings))
|
||||
with current_event.create_span("Generate Answer using RAG"):
|
||||
retriever = EveAIRetriever(model_variables, tenant_info)
|
||||
llm = model_variables['llm']
|
||||
template = model_variables['rag_template']
|
||||
language_template = create_language_template(template, language)
|
||||
full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
|
||||
rag_prompt = ChatPromptTemplate.from_template(full_template)
|
||||
setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
|
||||
current_app.rag_tuning_logger.debug(f'{urls}')
|
||||
current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
|
||||
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
|
||||
|
||||
for emb_id in existing_embedding_ids:
|
||||
new_interaction_embedding = InteractionEmbedding(embedding_id=emb_id)
|
||||
new_interaction_embedding.interaction = new_interaction
|
||||
new_interaction_embeddings.append(new_interaction_embedding)
|
||||
new_interaction_embeddings = []
|
||||
if not model_variables['cited_answer_cls']: # The model doesn't support structured feedback
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
result['citations'] = urls
|
||||
chain = setup_and_retrieval | rag_prompt | llm | output_parser
|
||||
|
||||
# Disable langchain debugging if set above.
|
||||
# set_debug(False)
|
||||
# Invoke the chain with the actual question
|
||||
answer = chain.invoke(detailed_question)
|
||||
new_interaction.answer = answer
|
||||
result = {
|
||||
'answer': answer,
|
||||
'citations': [],
|
||||
'insufficient_info': False
|
||||
}
|
||||
|
||||
new_interaction.answer_at = dt.now(tz.utc)
|
||||
chat_session.session_end = dt.now(tz.utc)
|
||||
else: # The model supports structured feedback
|
||||
structured_llm = llm.with_structured_output(model_variables['cited_answer_cls'])
|
||||
|
||||
try:
|
||||
db.session.add(chat_session)
|
||||
db.session.add(new_interaction)
|
||||
db.session.add_all(new_interaction_embeddings)
|
||||
db.session.commit()
|
||||
return result, new_interaction
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
|
||||
raise
|
||||
chain = setup_and_retrieval | rag_prompt | structured_llm
|
||||
|
||||
result = chain.invoke(detailed_question).dict()
|
||||
current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
|
||||
current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
|
||||
current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
|
||||
current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
|
||||
current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
|
||||
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
|
||||
new_interaction.answer = result['answer']
|
||||
|
||||
# Filter out the existing Embedding IDs
|
||||
given_embedding_ids = [int(emb_id) for emb_id in result['citations']]
|
||||
embeddings = (
|
||||
db.session.query(Embedding)
|
||||
.filter(Embedding.id.in_(given_embedding_ids))
|
||||
.all()
|
||||
)
|
||||
existing_embedding_ids = [emb.id for emb in embeddings]
|
||||
urls = list(set(emb.document_version.url for emb in embeddings))
|
||||
if tenant.rag_tuning:
|
||||
current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
|
||||
current_app.rag_tuning_logger.debug(f'{urls}')
|
||||
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
|
||||
|
||||
for emb_id in existing_embedding_ids:
|
||||
new_interaction_embedding = InteractionEmbedding(embedding_id=emb_id)
|
||||
new_interaction_embedding.interaction = new_interaction
|
||||
new_interaction_embeddings.append(new_interaction_embedding)
|
||||
|
||||
result['citations'] = urls
|
||||
|
||||
# Disable langchain debugging if set above.
|
||||
# set_debug(False)
|
||||
|
||||
new_interaction.answer_at = dt.now(tz.utc)
|
||||
chat_session.session_end = dt.now(tz.utc)
|
||||
|
||||
try:
|
||||
db.session.add(chat_session)
|
||||
db.session.add(new_interaction)
|
||||
db.session.add_all(new_interaction_embeddings)
|
||||
db.session.commit()
|
||||
return result, new_interaction
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def answer_using_llm(question, language, tenant, chat_session):
|
||||
@@ -236,47 +243,49 @@ def answer_using_llm(question, language, tenant, chat_session):
|
||||
# Langchain debugging if required
|
||||
# set_debug(True)
|
||||
|
||||
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
|
||||
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
|
||||
new_interaction.detailed_question = detailed_question
|
||||
new_interaction.detailed_question_at = dt.now(tz.utc)
|
||||
with current_event.create_span("Detail Question"):
|
||||
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
|
||||
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
|
||||
new_interaction.detailed_question = detailed_question
|
||||
new_interaction.detailed_question_at = dt.now(tz.utc)
|
||||
|
||||
retriever = EveAIRetriever(model_variables, tenant_info)
|
||||
llm = model_variables['llm_no_rag']
|
||||
template = model_variables['encyclopedia_template']
|
||||
language_template = create_language_template(template, language)
|
||||
rag_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
with current_event.create_span("Detail Answer using LLM"):
|
||||
retriever = EveAIRetriever(model_variables, tenant_info)
|
||||
llm = model_variables['llm_no_rag']
|
||||
template = model_variables['encyclopedia_template']
|
||||
language_template = create_language_template(template, language)
|
||||
rag_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
new_interaction_embeddings = []
|
||||
new_interaction_embeddings = []
|
||||
|
||||
chain = setup | rag_prompt | llm | output_parser
|
||||
input_question = {"question": detailed_question}
|
||||
chain = setup | rag_prompt | llm | output_parser
|
||||
input_question = {"question": detailed_question}
|
||||
|
||||
# Invoke the chain with the actual question
|
||||
answer = chain.invoke(input_question)
|
||||
new_interaction.answer = answer
|
||||
result = {
|
||||
'answer': answer,
|
||||
'citations': [],
|
||||
'insufficient_info': False
|
||||
}
|
||||
# Invoke the chain with the actual question
|
||||
answer = chain.invoke(input_question)
|
||||
new_interaction.answer = answer
|
||||
result = {
|
||||
'answer': answer,
|
||||
'citations': [],
|
||||
'insufficient_info': False
|
||||
}
|
||||
|
||||
# Disable langchain debugging if set above.
|
||||
# set_debug(False)
|
||||
# Disable langchain debugging if set above.
|
||||
# set_debug(False)
|
||||
|
||||
new_interaction.answer_at = dt.now(tz.utc)
|
||||
chat_session.session_end = dt.now(tz.utc)
|
||||
new_interaction.answer_at = dt.now(tz.utc)
|
||||
chat_session.session_end = dt.now(tz.utc)
|
||||
|
||||
try:
|
||||
db.session.add(chat_session)
|
||||
db.session.add(new_interaction)
|
||||
db.session.commit()
|
||||
return result, new_interaction
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
|
||||
raise
|
||||
try:
|
||||
db.session.add(chat_session)
|
||||
db.session.add(new_interaction)
|
||||
db.session.commit()
|
||||
return result, new_interaction
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def tasks_ping():
|
||||
|
||||
Reference in New Issue
Block a user