- Introduction of dynamic Retrievers & Specialists

- Introduction of dynamic Processors - Introduction of caching system - Introduction of a better template manager - Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists - Start adaptation of chat client
2024-11-15 10:00:53 +01:00
parent 55a8a95f79
commit 1807435339
101 changed files with 4181 additions and 1764 deletions
--- a/eveai_chat_workers/tasks.py
+++ b/eveai_chat_workers/tasks.py
@@ -1,24 +1,23 @@
 from datetime import datetime as dt, timezone as tz
+from typing import Dict, Any, Optional
+
 from flask import current_app
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from sqlalchemy.exc import SQLAlchemyError

-# OpenAI imports
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.exceptions import LangChainException
-
+from common.utils.config_field_types import TaggingFields
 from common.utils.database import Database
-from common.models.document import Embedding
+from common.models.document import Embedding, Catalog
 from common.models.user import Tenant
-from common.models.interaction import ChatSession, Interaction, InteractionEmbedding
-from common.extensions import db
+from common.models.interaction import ChatSession, Interaction, InteractionEmbedding, Specialist, SpecialistRetriever
+from common.extensions import db, cache_manager
 from common.utils.celery_utils import current_celery
-from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template
-from common.langchain.retrievers.eveai_default_rag_retriever import EveAIDefaultRagRetriever
-from common.langchain.retrievers.eveai_history_retriever import EveAIHistoryRetriever
 from common.utils.business_event import BusinessEvent
 from common.utils.business_event_context import current_event
+from config.specialist_types import SPECIALIST_TYPES
+from eveai_chat_workers.chat_session_cache import get_chat_history
+from eveai_chat_workers.specialists.registry import SpecialistRegistry
+from config.retriever_types import RETRIEVER_TYPES
+from eveai_chat_workers.specialists.specialist_typing import SpecialistArguments


 # Healthcheck task
@@ -27,41 +26,207 @@ def ping():
    return 'pong'


-def detail_question(question, language, model_variables, session_id):
-    current_app.logger.debug(f'Detail question: {question}')
-    current_app.logger.debug(f'model_variables: {model_variables}')
-    current_app.logger.debug(f'session_id: {session_id}')
-    retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
-    llm = model_variables['llm']
-    template = model_variables['history_template']
-    language_template = create_language_template(template, language)
-    full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
-    history_prompt = ChatPromptTemplate.from_template(full_template)
-    setup_and_retrieval = RunnableParallel({"history": retriever, "question": RunnablePassthrough()})
-    output_parser = StrOutputParser()
-
-    chain = setup_and_retrieval | history_prompt | llm | output_parser
-
-    try:
-        answer = chain.invoke(question)
-        return answer
-    except LangChainException as e:
-        current_app.logger.error(f'Error detailing question: {e}')
-        raise
+class ArgumentPreparationError(Exception):
+    """Custom exception for argument preparation errors"""
+    pass


-@current_celery.task(name='ask_question', queue='llm_interactions')
-def ask_question(tenant_id, question, language, session_id, user_timezone, room):
-    """returns result structured as follows:
-    result = {
-    'answer': 'Your answer here',
-    'citations': ['http://example.com/citation1', 'http://example.com/citation2'],
-    'algorithm': 'algorithm_name',
-    'interaction_id': 'interaction_id_value'
-    }
+def validate_specialist_arguments(specialist_type: str, arguments: Dict[str, Any]) -> None:
    """
-    with BusinessEvent("Ask Question", tenant_id=tenant_id, chat_session_id=session_id):
-        current_app.logger.info(f'ask_question: Received question for tenant {tenant_id}: {question}. Processing...')
+    Validate specialist-specific arguments
+
+    Args:
+        specialist_type: Type of specialist
+        arguments: Arguments to validate (excluding retriever-specific arguments)
+
+    Raises:
+        ArgumentPreparationError: If validation fails
+    """
+    specialist_config = SPECIALIST_TYPES.get(specialist_type)
+    if not specialist_config:
+        raise ArgumentPreparationError(f"Unknown specialist type: {specialist_type}")
+
+    required_args = specialist_config.get('arguments', {})
+
+    # Check for required arguments
+    for arg_name, arg_config in required_args.items():
+        if arg_config.get('required', False) and arg_name not in arguments:
+            raise ArgumentPreparationError(f"Missing required argument '{arg_name}' for specialist")
+
+        if arg_name in arguments:
+            # Type checking
+            expected_type = arg_config.get('type')
+            if expected_type == 'str' and not isinstance(arguments[arg_name], str):
+                raise ArgumentPreparationError(f"Argument '{arg_name}' must be a string")
+            elif expected_type == 'int' and not isinstance(arguments[arg_name], int):
+                raise ArgumentPreparationError(f"Argument '{arg_name}' must be an integer")
+
+
+def validate_retriever_arguments(retriever_type: str, arguments: Dict[str, Any],
+                                 catalog_config: Optional[Dict[str, Any]] = None) -> None:
+    """
+    Validate retriever-specific arguments
+
+    Args:
+        retriever_type: Type of retriever
+        arguments: Arguments to validate
+        catalog_config: Optional catalog configuration for metadata validation
+
+    Raises:
+        ArgumentPreparationError: If validation fails
+    """
+    retriever_config = RETRIEVER_TYPES.get(retriever_type)
+    if not retriever_config:
+        raise ArgumentPreparationError(f"Unknown retriever type: {retriever_type}")
+
+    # Validate standard retriever arguments
+    required_args = retriever_config.get('arguments', {})
+    for arg_name, arg_config in required_args.items():
+        if arg_config.get('required', False) and arg_name not in arguments:
+            raise ArgumentPreparationError(f"Missing required argument '{arg_name}' for retriever")
+
+    # Only validate metadata filters if catalog configuration is provided
+    if catalog_config and 'metadata_filters' in arguments:
+        if 'tagging_fields' in catalog_config:
+            tagging_fields = TaggingFields.from_dict(catalog_config['tagging_fields'])
+            errors = tagging_fields.validate_argument_values(arguments['metadata_filters'])
+            if errors:
+                raise ArgumentPreparationError(f"Invalid metadata filters: {errors}")
+
+
+def is_retriever_id(key: str) -> bool:
+    """
+    Check if a key represents a valid retriever ID.
+    Valid formats: positive integers, including leading zeros
+
+    Args:
+        key: String to check
+
+    Returns:
+        bool: True if the key represents a valid retriever ID
+    """
+    try:
+        # Convert to int to handle leading zeros
+        value = int(key)
+        # Ensure it's a positive number
+        return value > 0
+    except ValueError:
+        return False
+
+
+def prepare_arguments(specialist: Any, arguments: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Prepare complete argument dictionary for specialist execution with inheritance
+
+    Args:
+        specialist: Specialist model instance
+        arguments: Dictionary containing:
+            - Specialist arguments
+            - Retriever-specific arguments keyed by retriever ID
+
+    Returns:
+        Dict containing prepared arguments with inheritance applied
+
+    Raises:
+        ArgumentPreparationError: If argument preparation or validation fails
+    """
+    try:
+        # Separate specialist arguments from retriever arguments
+        retriever_args = {}
+        specialist_args = {}
+
+        for key, value in arguments.items():
+            if isinstance(key, str) and is_retriever_id(key):  # Retriever ID
+                retriever_args[key] = value
+            else:
+                specialist_args[key] = value
+
+        # Validate specialist arguments
+        validate_specialist_arguments(specialist.type, specialist_args)
+
+        # Get all retrievers associated with this specialist
+        specialist_retrievers = (
+            SpecialistRetriever.query
+            .filter_by(specialist_id=specialist.id)
+            .all()
+        )
+
+        # Process each retriever
+        prepared_retriever_args = {}
+        for spec_retriever in specialist_retrievers:
+            retriever = spec_retriever.retriever
+            retriever_id = str(retriever.id)
+
+            # Get catalog configuration if it exists
+            catalog_config = None
+            if retriever.catalog_id:
+                try:
+                    catalog = Catalog.query.get(retriever.catalog_id)
+                    if catalog:
+                        catalog_config = catalog.configuration
+                except SQLAlchemyError:
+                    current_app.logger.warning(
+                        f"Could not fetch catalog {retriever.catalog_id} for retriever {retriever_id}"
+                    )
+
+            # Start with specialist arguments (inheritance)
+            inherited_args = specialist_args.copy()
+
+            # Override with retriever-specific arguments if provided
+            if retriever_id in retriever_args:
+                inherited_args.update(retriever_args[retriever_id])
+
+            # Always include the retriever type
+            inherited_args['type'] = retriever.type
+
+            # Validate the combined arguments
+            validate_retriever_arguments(
+                retriever.type,
+                inherited_args,
+                catalog_config
+            )
+
+            prepared_retriever_args[retriever_id] = inherited_args
+
+        # Construct final argument structure
+        final_arguments = {
+            **specialist_args,
+            'retriever_arguments': prepared_retriever_args
+        }
+        return final_arguments
+
+    except SQLAlchemyError as e:
+        current_app.logger.error(f'Database error during argument preparation: {e}')
+        raise ArgumentPreparationError(f"Database error: {str(e)}")
+    except Exception as e:
+        current_app.logger.error(f'Error during argument preparation: {e}')
+        raise ArgumentPreparationError(str(e))
+
+
+@current_celery.task(name='execute_specialist', queue='llm_interactions')
+def execute_specialist(tenant_id: int, specialist_id: int, arguments: Dict[str, Any],
+                       session_id: str, user_timezone: str, room: str) -> dict:
+    """
+    Execute a specialist with given arguments
+
+    Args:
+        tenant_id: ID of the tenant
+        specialist_id: ID of the specialist to use
+        arguments: Dictionary containing all required arguments for specialist and retrievers
+        session_id: Chat session ID
+        user_timezone: User's timezone
+        room: Socket.IO room for the response
+
+    Returns:
+        dict: {
+            'result': Dict - Specialist execution result
+            'interaction_id': int - Created interaction ID
+            'room': str - Socket.IO room
+        }
+    """
+    with BusinessEvent("Execute Specialist", tenant_id=tenant_id, chat_session_id=session_id) as event:
+        current_app.logger.info(
+            f'execute_specialist: Processing request for tenant {tenant_id} using specialist {specialist_id}')

        try:
            # Retrieve the tenant
@@ -69,208 +234,85 @@ def ask_question(tenant_id, question, language, session_id, user_timezone, room)
            if not tenant:
                raise Exception(f'Tenant {tenant_id} not found.')

-            # Ensure we are working in the correct database schema
+            # Switch to correct database schema
            Database(tenant_id).switch_schema()

-            # Ensure we have a session to story history
-            chat_session = ChatSession.query.filter_by(session_id=session_id).first()
-            if not chat_session:
+            # Ensure we have a session
+            cached_session = cache_manager.chat_session_cache.get_cached_session(
+                session_id,
+                create_params={'timezone': user_timezone}
+            )
+
+            # Get specialist from database
+            specialist = Specialist.query.get_or_404(specialist_id)
+
+            # Prepare complete arguments
+            try:
+                raw_arguments = prepare_arguments(specialist, arguments)
+                # Convert the prepared arguments into a SpecialistArguments instance
+                complete_arguments = SpecialistArguments.create(
+                    type_name=specialist.type,
+                    specialist_args={k: v for k, v in raw_arguments.items() if k != 'retriever_arguments'},
+                    retriever_args=raw_arguments.get('retriever_arguments', {})
+                )
+            except ValueError as e:
+                current_app.logger.error(f'execute_specialist: Error preparing arguments: {e}')
+                raise
+
+            # Create new interaction record
+            new_interaction = Interaction()
+            new_interaction.chat_session_id = cached_session.id
+            new_interaction.timezone = user_timezone
+            new_interaction.question_at = dt.now(tz.utc)
+            new_interaction.specialist_id = specialist.id
+            new_interaction.specialist_arguments = complete_arguments.model_dump(mode='json')
+
+            try:
+                db.session.add(new_interaction)
+                db.session.commit()
+                event.update_attribute('interaction_id', new_interaction.id)
+
+            except SQLAlchemyError as e:
+                current_app.logger.error(f'execute_specialist: Error creating interaction: {e}')
+                raise
+
+            with current_event.create_span("Specialist invocation"):
+                # Initialize specialist instance
+                specialist_class = SpecialistRegistry.get_specialist_class(specialist.type)
+                specialist_instance = specialist_class(
+                    tenant_id=tenant_id,
+                    specialist_id=specialist_id,
+                    session_id=session_id,
+                )
+
+                # Execute specialist
+                result = specialist_instance.execute(complete_arguments)
+
+                # Update interaction record
+                new_interaction.specialist_results = result.model_dump(mode='json')  # Store complete result
+                new_interaction.answer_at = dt.now(tz.utc)
+
                try:
-                    chat_session = ChatSession()
-                    chat_session.session_id = session_id
-                    chat_session.session_start = dt.now(tz.utc)
-                    chat_session.timezone = user_timezone
-                    db.session.add(chat_session)
+                    db.session.add(new_interaction)
                    db.session.commit()
                except SQLAlchemyError as e:
-                    current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
+                    current_app.logger.error(f'execute_specialist: Error updating interaction: {e}')
                    raise

-            with current_event.create_span("RAG Answer"):
-                result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
-                result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
-                result['interaction_id'] = interaction.id
-                result['room'] = room  # Include the room in the result
+                # Now that we have a complete interaction with an answer, add it to the cache
+                cache_manager.chat_session_cache.add_completed_interaction(session_id, new_interaction)

-            if result['insufficient_info']:
-                if 'LLM' in tenant.fallback_algorithms:
-                    with current_event.create_span("Fallback Algorithm LLM"):
-                        result, interaction = answer_using_llm(question, language, tenant, chat_session)
-                        result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['LLM']['name']
-                        result['interaction_id'] = interaction.id
-                        result['room'] = room  # Include the room in the result
-
-            return result
-        except Exception as e:
-            current_app.logger.error(f'ask_question: Error processing question: {e}')
-            raise
-
-
-def answer_using_tenant_rag(question, language, tenant, chat_session):
-    new_interaction = Interaction()
-    new_interaction.question = question
-    new_interaction.language = language
-    new_interaction.timezone = chat_session.timezone
-    new_interaction.appreciation = None
-    new_interaction.chat_session_id = chat_session.id
-    new_interaction.question_at = dt.now(tz.utc)
-    new_interaction.algorithm_used = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
-
-    # Select variables to work with depending on tenant model
-    model_variables = select_model_variables(tenant)
-    tenant_info = tenant.to_dict()
-
-    # Langchain debugging if required
-    # set_debug(True)
-
-    with current_event.create_span("Detail Question"):
-        detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
-        if model_variables['rag_tuning']:
-            current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
-            current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
-        new_interaction.detailed_question = detailed_question
-        new_interaction.detailed_question_at = dt.now(tz.utc)
-
-    with current_event.create_span("Generate Answer using RAG"):
-        retriever = EveAIDefaultRagRetriever(1, model_variables, tenant_info)
-        llm = model_variables['llm']
-        template = model_variables['rag_template']
-        language_template = create_language_template(template, language)
-        full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
-        rag_prompt = ChatPromptTemplate.from_template(full_template)
-        setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
-        if model_variables['rag_tuning']:
-            current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
-            current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
-
-        new_interaction_embeddings = []
-        if not model_variables['cited_answer_cls']:  # The model doesn't support structured feedback
-            output_parser = StrOutputParser()
-
-            chain = setup_and_retrieval | rag_prompt | llm | output_parser
-
-            # Invoke the chain with the actual question
-            answer = chain.invoke(detailed_question)
-            new_interaction.answer = answer
-            result = {
-                'answer': answer,
-                'citations': [],
-                'insufficient_info': False
+            # Prepare response
+            response = {
+                'result': result.model_dump(),
+                'interaction_id': new_interaction.id,
+                'room': room
            }

-        else:  # The model supports structured feedback
-            structured_llm = llm.with_structured_output(model_variables['cited_answer_cls'])
+            return response

-            chain = setup_and_retrieval | rag_prompt | structured_llm
-
-            result = chain.invoke(detailed_question).dict()
-            current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
-            current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
-            current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
-            if model_variables['rag_tuning']:
-                current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
-                current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
-                current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
-                current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
-            new_interaction.answer = result['answer']
-
-            # Filter out the existing Embedding IDs
-            given_embedding_ids = [int(emb_id) for emb_id in result['citations']]
-            embeddings = (
-                db.session.query(Embedding)
-                .filter(Embedding.id.in_(given_embedding_ids))
-                .all()
-            )
-            existing_embedding_ids = [emb.id for emb in embeddings]
-            urls = list(set(emb.document_version.url for emb in embeddings))
-            if model_variables['rag_tuning']:
-                current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
-                current_app.rag_tuning_logger.debug(f'{urls}')
-                current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
-
-            for emb_id in existing_embedding_ids:
-                new_interaction_embedding = InteractionEmbedding(embedding_id=emb_id)
-                new_interaction_embedding.interaction = new_interaction
-                new_interaction_embeddings.append(new_interaction_embedding)
-
-            result['citations'] = urls
-
-        # Disable langchain debugging if set above.
-        # set_debug(False)
-
-        new_interaction.answer_at = dt.now(tz.utc)
-        chat_session.session_end = dt.now(tz.utc)
-
-        try:
-            db.session.add(chat_session)
-            db.session.add(new_interaction)
-            db.session.add_all(new_interaction_embeddings)
-            db.session.commit()
-            return result, new_interaction
-        except SQLAlchemyError as e:
-            current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
-            raise
-
-
-def answer_using_llm(question, language, tenant, chat_session):
-    new_interaction = Interaction()
-    new_interaction.question = question
-    new_interaction.language = language
-    new_interaction.timezone = chat_session.timezone
-    new_interaction.appreciation = None
-    new_interaction.chat_session_id = chat_session.id
-    new_interaction.question_at = dt.now(tz.utc)
-    new_interaction.algorithm_used = current_app.config['INTERACTION_ALGORITHMS']['LLM']['name']
-
-    # Select variables to work with depending on tenant model
-    model_variables = select_model_variables(tenant)
-    tenant_info = tenant.to_dict()
-
-    # Langchain debugging if required
-    # set_debug(True)
-
-    with current_event.create_span("Detail Question"):
-        detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
-        current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
-        new_interaction.detailed_question = detailed_question
-        new_interaction.detailed_question_at = dt.now(tz.utc)
-
-    with current_event.create_span("Detail Answer using LLM"):
-        retriever = EveAIDefaultRagRetriever(1, model_variables, tenant_info)
-        llm = model_variables['llm_no_rag']
-        template = model_variables['encyclopedia_template']
-        language_template = create_language_template(template, language)
-        rag_prompt = ChatPromptTemplate.from_template(language_template)
-        setup = RunnablePassthrough()
-        output_parser = StrOutputParser()
-
-        new_interaction_embeddings = []
-
-        chain = setup | rag_prompt | llm | output_parser
-        input_question = {"question": detailed_question}
-
-        # Invoke the chain with the actual question
-        answer = chain.invoke(input_question)
-        new_interaction.answer = answer
-        result = {
-            'answer': answer,
-            'citations': [],
-            'insufficient_info': False
-        }
-
-        # Disable langchain debugging if set above.
-        # set_debug(False)
-
-        new_interaction.answer_at = dt.now(tz.utc)
-        chat_session.session_end = dt.now(tz.utc)
-
-        try:
-            db.session.add(chat_session)
-            db.session.add(new_interaction)
-            db.session.commit()
-            return result, new_interaction
-        except SQLAlchemyError as e:
-            current_app.logger.error(f'ask_question: Error saving interaction to database: {e}')
+        except Exception as e:
+            current_app.logger.error(f'execute_specialist: Error executing specialist: {e}')
            raise