- Introduction of dynamic Retrievers & Specialists

- Introduction of dynamic Processors - Introduction of caching system - Introduction of a better template manager - Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists - Start adaptation of chat client
2024-11-15 10:00:53 +01:00
parent 55a8a95f79
commit 1807435339
101 changed files with 4181 additions and 1764 deletions
--- a/config/catalog_types.py
+++ b/config/catalog_types.py
@@ -3,7 +3,8 @@ CATALOG_TYPES = {
    "STANDARD_CATALOG": {
        "name": "Standard Catalog",
        "Description": "A Catalog with information in Evie's Library, to be considered as a whole",
-        "configuration": {}
+        "configuration": {},
+        "document_version_configurations": []
    },
    "DOSSIER": {
        "name": "Dossier Catalog",
@@ -21,31 +22,6 @@ CATALOG_TYPES = {
                                - min_value/max_value: range limits (for numeric types only)""",
                "required": True,
                "default": {},
-                "field_properties": {
-                    "type": {
-                        "allowed_values": ["string", "integer", "float", "date", "enum"],
-                        "required": True
-                    },
-                    "required": {
-                        "type": "boolean",
-                        "default": False
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "allowed_values": {
-                        "type": "list",
-                        "description": "For enum type fields only"
-                    },
-                    "min_value": {
-                        "type": "number",
-                        "description": "For numeric fields only"
-                    },
-                    "max_value": {
-                        "type": "number",
-                        "description": "For numeric fields only"
-                    }
-                }
            }
        },
        "document_version_configurations": ["tagging_fields"]
--- a/config/config.py
+++ b/config/config.py
@@ -68,9 +68,6 @@ class Config(object):

    ANTHROPIC_LLM_VERSIONS = {'claude-3-5-sonnet': 'claude-3-5-sonnet-20240620', }

-    # Load prompt templates dynamically
-    PROMPT_TEMPLATES = {model: load_prompt_templates(model) for model in SUPPORTED_LLMS}
-
    # Annotation text chunk length
    ANNOTATION_TEXT_CHUNK_LENGTH = {
        'openai.gpt-4o': 10000,
@@ -87,9 +84,6 @@ class Config(object):
    # Anthropic API Keys
    ANTHROPIC_API_KEY = environ.get('ANTHROPIC_API_KEY')

-    # Portkey API Keys
-    PORTKEY_API_KEY = environ.get('PORTKEY_API_KEY')
-
    # Celery settings
    CELERY_TASK_SERIALIZER = 'json'
    CELERY_RESULT_SERIALIZER = 'json'
@@ -181,13 +175,21 @@ class DevConfig(Config):
    # file upload settings
    # UPLOAD_FOLDER = '/app/tenant_files'

+    # Redis Settings
+    REDIS_URL = 'redis'
+    REDIS_PORT = '6379'
+    REDIS_BASE_URI = f'redis://{REDIS_URL}:{REDIS_PORT}'
+
    # Celery settings
    # eveai_app Redis Settings
-    CELERY_BROKER_URL = 'redis://redis:6379/0'
-    CELERY_RESULT_BACKEND = 'redis://redis:6379/0'
+    CELERY_BROKER_URL = f'{REDIS_BASE_URI}/0'
+    CELERY_RESULT_BACKEND = f'{REDIS_BASE_URI}/0'
    # eveai_chat Redis Settings
-    CELERY_BROKER_URL_CHAT = 'redis://redis:6379/3'
-    CELERY_RESULT_BACKEND_CHAT = 'redis://redis:6379/3'
+    CELERY_BROKER_URL_CHAT = f'{REDIS_BASE_URI}/3'
+    CELERY_RESULT_BACKEND_CHAT = f'{REDIS_BASE_URI}/3'
+    # eveai_chat_workers cache Redis Settings
+    CHAT_WORKER_CACHE_URL = f'{REDIS_BASE_URI}/4'
+

    # Unstructured settings
    # UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
@@ -195,7 +197,7 @@ class DevConfig(Config):
    # UNSTRUCTURED_FULL_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io/general/v0/general'

    # SocketIO settings
-    SOCKETIO_MESSAGE_QUEUE = 'redis://redis:6379/1'
+    SOCKETIO_MESSAGE_QUEUE = f'{REDIS_BASE_URI}/1'
    SOCKETIO_CORS_ALLOWED_ORIGINS = '*'
    SOCKETIO_LOGGER = True
    SOCKETIO_ENGINEIO_LOGGER = True
@@ -211,7 +213,7 @@ class DevConfig(Config):
    GC_CRYPTO_KEY = 'envelope-encryption-key'

    # Session settings
-    SESSION_REDIS = redis.from_url('redis://redis:6379/2')
+    SESSION_REDIS = redis.from_url(f'{REDIS_BASE_URI}/2')

    # PATH settings
    ffmpeg_path = '/usr/bin/ffmpeg'
@@ -278,6 +280,8 @@ class ProdConfig(Config):
    # eveai_chat Redis Settings
    CELERY_BROKER_URL_CHAT = f'{REDIS_BASE_URI}/3'
    CELERY_RESULT_BACKEND_CHAT = f'{REDIS_BASE_URI}/3'
+    # eveai_chat_workers cache Redis Settings
+    CHAT_WORKER_CACHE_URL = f'{REDIS_BASE_URI}/4'

    # Session settings
    SESSION_REDIS = redis.from_url(f'{REDIS_BASE_URI}/2')
--- a/config/logging_config.py
+++ b/config/logging_config.py
@@ -1,4 +1,8 @@
+import json
 import os
+from datetime import datetime as dt, timezone as tz
+
+from flask import current_app
 from graypy import GELFUDPHandler
 import logging
 import logging.config
@@ -9,24 +13,173 @@ GRAYLOG_PORT = int(os.environ.get('GRAYLOG_PORT', 12201))
 env = os.environ.get('FLASK_ENV', 'development')


-class CustomLogRecord(logging.LogRecord):
+class TuningLogRecord(logging.LogRecord):
+    """Extended LogRecord that handles both tuning and business event logging"""
+
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
+        # Initialize extra fields after parent initialization
+        self._extra_fields = {}
+        self._is_tuning_log = False
+        self._tuning_type = None
+        self._tuning_tenant_id = None
+        self._tuning_catalog_id = None
+        self._tuning_specialist_id = None
+        self._tuning_retriever_id = None
+        self._tuning_processor_id = None
        self.component = os.environ.get('COMPONENT_NAME', 'eveai_app')

-    def __setattr__(self, name, value):
-        if name not in {'event_type', 'tenant_id', 'trace_id', 'span_id', 'span_name', 'parent_span_id',
-                        'document_version_id', 'chat_session_id', 'interaction_id', 'environment'}:
-            super().__setattr__(name, value)
+    def getMessage(self):
+        """
+        Override getMessage to handle both string and dict messages
+        """
+        msg = self.msg
+        if self.args:
+            msg = msg % self.args
+        return msg
+
+    @property
+    def is_tuning_log(self):
+        return self._is_tuning_log
+
+    @is_tuning_log.setter
+    def is_tuning_log(self, value):
+        object.__setattr__(self, '_is_tuning_log', value)
+
+    @property
+    def tuning_type(self):
+        return self._tuning_type
+
+    @tuning_type.setter
+    def tuning_type(self, value):
+        object.__setattr__(self, '_tuning_type', value)
+
+    def get_tuning_data(self):
+        """Get all tuning-related data if this is a tuning log"""
+        if not self._is_tuning_log:
+            return {}
+
+        return {
+            'is_tuning_log': self._is_tuning_log,
+            'tuning_type': self._tuning_type,
+            'tuning_tenant_id': self._tuning_tenant_id,
+            'tuning_catalog_id': self._tuning_catalog_id,
+            'tuning_specialist_id': self._tuning_specialist_id,
+            'tuning_retriever_id': self._tuning_retriever_id,
+            'tuning_processor_id': self._tuning_processor_id,
+        }
+
+    def set_tuning_data(self, tenant_id=None, catalog_id=None, specialist_id=None,
+                        retriever_id=None, processor_id=None):
+        """Set tuning-specific data"""
+        object.__setattr__(self, '_tuning_tenant_id', tenant_id)
+        object.__setattr__(self, '_tuning_catalog_id', catalog_id)
+        object.__setattr__(self, '_tuning_specialist_id', specialist_id)
+        object.__setattr__(self, '_tuning_retriever_id', retriever_id)
+        object.__setattr__(self, '_tuning_processor_id', processor_id)


-def custom_log_record_factory(*args, **kwargs):
-    record = CustomLogRecord(*args, **kwargs)
-    return record
+class TuningFormatter(logging.Formatter):
+    """Universal formatter for all tuning logs"""
+
+    def __init__(self, fmt=None, datefmt=None):
+        super().__init__(fmt or '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
+                         datefmt or '%Y-%m-%d %H:%M:%S')
+
+    def format(self, record):
+        # First format with the default formatter to handle basic fields
+        formatted_msg = super().format(record)
+
+        # If this is a tuning log, add the additional context
+        if getattr(record, 'is_tuning_log', False):
+            try:
+                identifiers = []
+                if hasattr(record, 'tenant_id') and record.tenant_id:
+                    identifiers.append(f"Tenant: {record.tenant_id}")
+                if hasattr(record, 'catalog_id') and record.catalog_id:
+                    identifiers.append(f"Catalog: {record.catalog_id}")
+                if hasattr(record, 'processor_id') and record.processor_id:
+                    identifiers.append(f"Processor: {record.processor_id}")
+
+                formatted_msg = (
+                    f"{formatted_msg}\n"
+                    f"[TUNING {record.tuning_type}] [{' | '.join(identifiers)}]"
+                )
+
+                if hasattr(record, 'tuning_data') and record.tuning_data:
+                    formatted_msg += f"\nData: {json.dumps(record.tuning_data, indent=2)}"
+
+            except Exception as e:
+                return f"{formatted_msg} (Error formatting tuning data: {str(e)})"
+
+        return formatted_msg
+
+
+class GraylogFormatter(logging.Formatter):
+    """Maintains existing Graylog formatting while adding tuning fields"""
+
+    def format(self, record):
+        if getattr(record, 'is_tuning_log', False):
+            # Add tuning-specific fields to Graylog
+            record.tuning_fields = {
+                'is_tuning_log': True,
+                'tuning_type': record.tuning_type,
+                'tenant_id': record.tenant_id,
+                'catalog_id': record.catalog_id,
+                'specialist_id': record.specialist_id,
+                'retriever_id': record.retriever_id,
+                'processor_id': record.processor_id,
+            }
+        return super().format(record)
+
+
+class TuningLogger:
+    """Helper class to manage tuning logs with consistent structure"""
+
+    def __init__(self, logger_name, tenant_id=None, catalog_id=None, specialist_id=None, retriever_id=None, processor_id=None):
+        self.logger = logging.getLogger(logger_name)
+        self.tenant_id = tenant_id
+        self.catalog_id = catalog_id
+        self.specialist_id = specialist_id
+        self.retriever_id = retriever_id
+        self.processor_id = processor_id
+
+    def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DEBUG):
+        """Log a tuning event with structured data"""
+        try:
+            # Create a standard LogRecord for tuning
+            record = logging.LogRecord(
+                name=self.logger.name,
+                level=level,
+                pathname='',
+                lineno=0,
+                msg=message,
+                args=(),
+                exc_info=None
+            )
+
+            # Add tuning-specific attributes
+            record.is_tuning_log = True
+            record.tuning_type = tuning_type
+            record.tenant_id = self.tenant_id
+            record.catalog_id = self.catalog_id
+            record.specialist_id = self.specialist_id
+            record.retriever_id = self.retriever_id
+            record.processor_id = self.processor_id
+
+            if data:
+                record.tuning_data = data
+
+            # Process the record
+            self.logger.handle(record)
+
+        except Exception as e:
+            fallback_logger = logging.getLogger('eveai_workers')
+            fallback_logger.exception(f"Failed to log tuning message: {str(e)}")


 # Set the custom log record factory
-logging.setLogRecordFactory(custom_log_record_factory)
+logging.setLogRecordFactory(TuningLogRecord)


 LOGGING = {
@@ -38,7 +191,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_app.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_workers': {
@@ -46,7 +199,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_workers.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_chat': {
@@ -54,7 +207,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_chat.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_chat_workers': {
@@ -62,7 +215,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_chat_workers.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_api': {
@@ -70,7 +223,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_api.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_beat': {
@@ -78,7 +231,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_beat.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_entitlements': {
@@ -86,7 +239,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/eveai_entitlements.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_sqlalchemy': {
@@ -94,7 +247,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/sqlalchemy.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_mailman': {
@@ -102,7 +255,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/mailman.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_security': {
@@ -110,7 +263,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/security.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_rag_tuning': {
@@ -118,7 +271,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/rag_tuning.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_embed_tuning': {
@@ -126,7 +279,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/embed_tuning.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_business_events': {
@@ -134,7 +287,7 @@ LOGGING = {
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': 'logs/business_events.log',
            'maxBytes': 1024 * 1024 * 1,  # 1MB
-            'backupCount': 10,
+            'backupCount': 2,
            'formatter': 'standard',
        },
        'console': {
@@ -142,25 +295,38 @@ LOGGING = {
            'level': 'DEBUG',
            'formatter': 'standard',
        },
+        'tuning_file': {
+            'level': 'DEBUG',
+            'class': 'logging.handlers.RotatingFileHandler',
+            'filename': 'logs/tuning.log',
+            'maxBytes': 1024 * 1024 * 3,  # 3MB
+            'backupCount': 3,
+            'formatter': 'tuning',
+        },
        'graylog': {
            'level': 'DEBUG',
            'class': 'graypy.GELFUDPHandler',
            'host': GRAYLOG_HOST,
            'port': GRAYLOG_PORT,
-            'debugging_fields': True,  # Set to True if you want to include debugging fields
-            'extra_fields': True,  # Set to True if you want to include extra fields
+            'debugging_fields': True,
+            'formatter': 'graylog'
        },
    },
    'formatters': {
        'standard': {
-            'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
-                      '[Thread: %(threadName)s]: %(message)s'
+            'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d]: %(message)s',
+            'datefmt': '%Y-%m-%d %H:%M:%S'
        },
        'graylog': {
            'format': '[%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
                      '[Thread: %(threadName)s]: %(message)s',
            'datefmt': '%Y-%m-%d %H:%M:%S',
+            '()': GraylogFormatter
        },
+        'tuning': {
+            '()': TuningFormatter,
+            'datefmt': '%Y-%m-%d %H:%M:%S UTC'
+        }
    },
    'loggers': {
        'eveai_app': {  # logger for the eveai_app
@@ -213,21 +379,17 @@ LOGGING = {
            'level': 'DEBUG',
            'propagate': False
        },
-        'rag_tuning': {  # logger for the rag_tuning
-            'handlers': ['file_rag_tuning', 'graylog', ] if env == 'production' else ['file_rag_tuning', ],
-            'level': 'DEBUG',
-            'propagate': False
-        },
-        'embed_tuning': {  # logger for the embed_tuning
-            'handlers': ['file_embed_tuning', 'graylog', ] if env == 'production' else ['file_embed_tuning', ],
-            'level': 'DEBUG',
-            'propagate': False
-        },
        'business_events': {
            'handlers': ['file_business_events', 'graylog'],
            'level': 'DEBUG',
            'propagate': False
        },
+        # Single tuning logger
+        'tuning': {
+            'handlers': ['tuning_file', 'graylog'] if env == 'production' else ['tuning_file'],
+            'level': 'DEBUG',
+            'propagate': False,
+        },
        '': {  # root logger
            'handlers': ['console'],
            'level': 'WARNING',  # Set higher level for root to minimize noise
--- a/config/processor_types.py
+++ b/config/processor_types.py
@@ -0,0 +1,56 @@
+# Catalog Types
+PROCESSOR_TYPES = {
+    "HTML_PROCESSOR": {
+        "name": "HTML Processor",
+        "file_types": "html",
+        "Description": "A processor for HTML files",
+        "configuration": {
+            "html_tags": {
+                "name": "HTML Tags",
+                "type": "string",
+                "description": "A comma-separated list of HTML tags",
+                "required": True,
+                "default": "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td"
+            },
+            "html_end_tags": {
+                "name": "HTML End Tags",
+                "type": "string",
+                "description": "A comma-separated list of HTML end tags (where can the chunk end)",
+                "required": True,
+                "default": "p, li, table"
+            },
+            "html_included_elements": {
+                "name": "HTML Included Elements",
+                "type": "string",
+                "description": "A comma-separated list of elements to be included",
+                "required": True,
+                "default": "article, main"
+            },
+            "html_excluded_elements": {
+                "name": "HTML Excluded Elements",
+                "type": "string",
+                "description": "A comma-separated list of elements to be excluded",
+                "required": False,
+                "default": "header, footer, nav, script"
+            },
+            "html_excluded_classes": {
+                "name": "HTML Excluded Classes",
+                "type": "string",
+                "description": "A comma-separated list of classes to be excluded",
+                "required": False,
+            },
+        },
+    },
+    "PDF_PROCESSOR": {
+        "name": "PDF Processor",
+        "file_types": "pdf",
+        "Description": "A Processor for PDF files",
+        "configuration": {}
+    },
+    "AUDIO_PROCESSOR": {
+        "name": "AUDIO Processor",
+        "file_types": "mp3, mp4, ogg",
+        "Description": "A Processor for audio files",
+        "configuration": {}
+    },
+}
--- a/config/prompts/anthropic/claude-3-5-sonnet.yaml
+++ b/config/prompts/anthropic/claude-3-5-sonnet.yaml
@@ -1,88 +0,0 @@
-html_parse: |
-  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
-  
-  # Best practices are:
-  - Respect wordings and language(s) used in the HTML.
-  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
-  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
-  - Be careful of encoding of the text. Everything needs to be human readable.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
-
-  HTML is between triple backticks.
-
-  ```{html}```  
-
-pdf_parse: |
-  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
-
-  # Best practices are:
-  - Respect wordings and language(s) used in the PDF.
-  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
-  - When headings are numbered, show the numbering and define the header level. 
-  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
-  - Paragraphs are to be stripped of newlines so they become easily readable.
-  - Be careful of encoding of the text. Everything needs to be human readable.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
-
-  PDF content is between triple backticks.
-
-  ```{pdf_content}```
-
-summary: |
-  Write a concise summary of the text in {language}. The text is delimited between triple backticks.
-  ```{text}```
-
-rag: |
-  Answer the question based on the following context, delimited between triple backticks. 
-  {tenant_context}
-  Use the following {language} in your communication, and cite the sources used.
-  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
-  Context:
-  ```{context}```
-  Question:
-  {question}
-
-history: |
-  You are a helpful assistant that details a question based on a previous context,
-  in such a way that the question is understandable without the previous context. 
-  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
-  The history is delimited between triple backticks.
-  You answer by stating the question in {language}.
-  History:
-  ```{history}```
-  Question to be detailed:
-  {question}
-
-encyclopedia: |
-  You have a lot of background knowledge, and as such you are some kind of 
-  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
-  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
-  Question:
-  {question}
-
-transcript: |
-  """You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. Your task is to process and improve the given transcript, not to summarize it.
-
-  IMPORTANT INSTRUCTIONS:
-  1. DO NOT summarize the transcript and don't make your own interpretations. Return the FULL, COMPLETE transcript with improvements.
-  2. Improve any errors in the transcript based on context.
-  3. Respect the original wording and language(s) used in the transcription. Main Language used is {language}.
-  4. Divide the transcript into paragraphs for better readability. Each paragraph ONLY contains ORIGINAL TEXT.
-  5. Group related paragraphs into logical sections.
-  6. Add appropriate headers (using markdown syntax) to each section in {language}.
-  7. We do not need an overall title. Just add logical headers
-  8. Ensure that the entire transcript is included in your response, from start to finish.
-  
-  REMEMBER: 
-  - Your output should be the complete transcript in markdown format, NOT A SUMMARY OR ANALYSIS. 
-  - Include EVERYTHING from the original transcript, just organized and formatted better.
-  - Just return the markdown version of the transcript, without any other text such as an introduction or a summary.
-  
-  Here is the transcript to process (between triple backticks):
-  
-  ```{transcript}```
-  
-  Process this transcript according to the instructions above and return the full, formatted markdown version.
-  """
--- a/config/prompts/openai/gpt-4o-mini.yaml
+++ b/config/prompts/openai/gpt-4o-mini.yaml
@@ -1,79 +0,0 @@
-html_parse: |
-  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
-  
-  # Best practices are:
-  - Respect wordings and language(s) used in the HTML.
-  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
-  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
-  - Be careful of encoding of the text. Everything needs to be human readable.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
-
-  HTML is between triple backquotes.
-
-  ```{html}```  
-
-pdf_parse: |
-  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
-
-  # Best practices are:
-  - Respect wordings and language(s) used in the PDF.
-  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
-  - When headings are numbered, show the numbering and define the header level. 
-  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
-  - Paragraphs are to be stripped of newlines so they become easily readable.
-  - Be careful of encoding of the text. Everything needs to be human readable.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
-
-  PDF content is between triple backquotes.
-
-  ```{pdf_content}```
-
-summary: |
-  Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
-  ```{text}```
-
-rag: |
-  Answer the question based on the following context, delimited between triple backquotes. 
-  {tenant_context}
-  Use the following {language} in your communication, and cite the sources used.
-  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
-  Context:
-  ```{context}```
-  Question:
-  {question}
-
-history: |
-  You are a helpful assistant that details a question based on a previous context,
-  in such a way that the question is understandable without the previous context. 
-  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
-  The history is delimited between triple backquotes.
-  You answer by stating the question in {language}.
-  History:
-  ```{history}```
-  Question to be detailed:
-  {question}
-
-encyclopedia: |
-  You have a lot of background knowledge, and as such you are some kind of 
-  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
-  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
-  Question:
-  {question}
-
-transcript: |
-  You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
-
-  # Best practices and steps are:
-  - Respect wordings and language(s) used in the transcription. Main language is {language}.
-  - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.    
-  - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
-  - annotate the text to identify these logical parts using headings in {language}.
-  - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
-
-  The transcript is between triple backquotes.
-
-  ```{transcript}```
--- a/config/prompts/openai/gpt-4o.yaml
+++ b/config/prompts/openai/gpt-4o.yaml
@@ -1,84 +0,0 @@
-html_parse: |
-  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
-  
-  # Best practices are:
-  - Respect wordings and language(s) used in the HTML.
-  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
-  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
-  - Be careful of encoding of the text. Everything needs to be human readable.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
-
-  HTML is between triple backquotes.
-
-  ```{html}```  
-
-pdf_parse: |
-  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
-  The content you get is already processed (some markdown already generated), but needs to be corrected. For large files, you may receive only portions of the full file. Consider this when processing the content.
-
-  # Best practices are:
-  - Respect wordings and language(s) used in the provided content.
-  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
-  - When headings are numbered, show the numbering and define the header level. You may have to correct current header levels, as preprocessing is known to make errors.
-  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
-  - Paragraphs are to be stripped of newlines so they become easily readable.
-  - Be careful of encoding of the text. Everything needs to be human readable.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
-
-  PDF content is between triple backquotes.
-
-  ```{pdf_content}```
-
-summary: |
-  Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
-  ```{text}```
-
-rag: |
-  Answer the question based on the following context, delimited between triple backquotes. 
-  {tenant_context}
-  Use the following {language} in your communication, and cite the sources used.
-  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
-  Context:
-  ```{context}```
-  Question:
-  {question}
-
-history: |
-  You are a helpful assistant that details a question based on a previous context,
-  in such a way that the question is understandable without the previous context. 
-  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
-  The history is delimited between triple backquotes.
-  You answer by stating the question in {language}.
-  History:
-  ```{history}```
-  Question to be detailed:
-  {question}
-
-encyclopedia: |
-  You have a lot of background knowledge, and as such you are some kind of 
-  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
-  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
-  Question:
-  {question}
-
-transcript: |
-  You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
-  You may receive information in different chunks. If you're not receiving the first chunk, you'll get the last part of the previous chunk, including it's title in between triple $. Consider this last part and the title as the start of the new chunk.
-
-
-  # Best practices and steps are:
-  - Respect wordings and language(s) used in the transcription. Main language is {language}.
-  - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.    
-  - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part. Don't make logical parts too small. They should contain at least 7 or 8 sentences.
-  - annotate the text to identify these logical parts using headings in {language}.
-  - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
-
-  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
-
-  The transcript is between triple backquotes.
-
-  $$${previous_part}$$$
-
-  ```{transcript}```
--- a/config/prompts/openai/gpt-4o/encyclopedia/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/encyclopedia/1.0.0.yaml
@@ -0,0 +1,12 @@
+version: "1.0.0"
+content: |
+  You have a lot of background knowledge, and as such you are some kind of 
+  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
+  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
+  Question:
+  {question}
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "A background information retriever for Evie"
+  changes: "Initial version migrated from flat file structure"
--- a/config/prompts/openai/gpt-4o/history/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/history/1.0.0.yaml
@@ -0,0 +1,16 @@
+version: "1.0.0"
+content: |
+  You are a helpful assistant that details a question based on a previous context,
+  in such a way that the question is understandable without the previous context. 
+  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
+  The history is delimited between triple backquotes.
+  You answer by stating the question in {language}.
+  History:
+  ```{history}```
+  Question to be detailed:
+  {question}
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "Prompt to further detail a question based on the previous conversation"
+  changes: "Initial version migrated from flat file structure"
--- a/config/prompts/openai/gpt-4o/html_parse/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/html_parse/1.0.0.yaml
@@ -0,0 +1,20 @@
+version: "1.0.0"
+content: |
+  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+  
+  # Best practices are:
+  - Respect wordings and language(s) used in the HTML.
+  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
+  - Be careful of encoding of the text. Everything needs to be human readable.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
+
+  HTML is between triple backquotes.
+
+  ```{html}```
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "An aid in transforming HTML-based inputs to markdown"
+  changes: "Initial version migrated from flat file structure"
--- a/config/prompts/openai/gpt-4o/pdf_parse/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/pdf_parse/1.0.0.yaml
@@ -0,0 +1,23 @@
+version: "1.0.0"
+content: |
+  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+  The content you get is already processed (some markdown already generated), but needs to be corrected. For large files, you may receive only portions of the full file. Consider this when processing the content.
+
+  # Best practices are:
+  - Respect wordings and language(s) used in the provided content.
+  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+  - When headings are numbered, show the numbering and define the header level. You may have to correct current header levels, as preprocessing is known to make errors.
+  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
+  - Paragraphs are to be stripped of newlines so they become easily readable.
+  - Be careful of encoding of the text. Everything needs to be human readable.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
+
+  PDF content is between triple backquotes.
+
+  ```{pdf_content}```
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "A assistant to parse PDF-content into markdown"
+  changes: "Initial version migrated from flat file structure"
--- a/config/prompts/openai/gpt-4o/rag/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/rag/1.0.0.yaml
@@ -0,0 +1,15 @@
+version: "1.0.0"
+content: |
+  Answer the question based on the following context, delimited between triple backquotes. 
+  {tenant_context}
+  Use the following {language} in your communication, and cite the sources used at the end of the full conversation.
+  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
+  Context:
+  ```{context}```
+  Question:
+  {question}
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "The Main RAG retriever"
+  changes: "Initial version migrated from flat file structure"
--- a/config/prompts/openai/gpt-4o/summary/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/summary/1.0.0.yaml
@@ -0,0 +1,9 @@
+version: "1.0.0"
+content: |
+  Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
+  ```{text}```
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "An assistant to create a summary when multiple chunks are required for 1 file"
+  changes: "Initial version migrated from flat file structure"
--- a/config/prompts/openai/gpt-4o/transcript/1.0.0.yaml
+++ b/config/prompts/openai/gpt-4o/transcript/1.0.0.yaml
@@ -0,0 +1,25 @@
+version: "1.0.0"
+content: |
+  You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
+  You may receive information in different chunks. If you're not receiving the first chunk, you'll get the last part of the previous chunk, including it's title in between triple $. Consider this last part and the title as the start of the new chunk.
+
+
+  # Best practices and steps are:
+  - Respect wordings and language(s) used in the transcription. Main language is {language}.
+  - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.    
+  - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part. Don't make logical parts too small. They should contain at least 7 or 8 sentences.
+  - annotate the text to identify these logical parts using headings in {language}.
+  - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
+
+  The transcript is between triple backquotes.
+
+  $$${previous_part}$$$
+
+  ```{transcript}```
+metadata:
+  author: "Josako"
+  date_added: "2024-11-10"
+  description: "An assistant to transform a transcript to markdown."
+  changes: "Initial version migrated from flat file structure"
--- a/config/retriever_types.py
+++ b/config/retriever_types.py
@@ -18,6 +18,14 @@ RETRIEVER_TYPES = {
                "required": True,
                "default": 0.3,
            },
+        },
+        "arguments": {
+            "query": {
+                "name": "query",
+                "type": "str",
+                "description": "Query to retrieve embeddings",
+                "required": True,
+            },
        }
    }
 }
--- a/config/specialist_types.py
+++ b/config/specialist_types.py
@@ -10,6 +10,13 @@ SPECIALIST_TYPES = {
                "description": "The context to be used by the specialist.",
                "required": False,
            },
+            "temperature": {
+                "name": "Temperature",
+                "type": "number",
+                "description": "The inference temperature to be used by the specialist.",
+                "required": False,
+                "default": 0.3
+            }
        },
        "arguments": {
            "language": {
@@ -18,6 +25,38 @@ SPECIALIST_TYPES = {
                "description": "Language code to be used for receiving questions and giving answers",
                "required": True,
            },
+            "query": {
+                "name": "query",
+                "type": "str",
+                "description": "Query to answer",
+                "required": True,
+            }
+        },
+        "results": {
+            "detailed_query": {
+                "name": "detailed_query",
+                "type": "str",
+                "description": "The query detailed with the Chat Session History.",
+                "required": True,
+            },
+            "answer": {
+                "name": "answer",
+                "type": "str",
+                "description": "Answer to the query",
+                "required": True,
+            },
+            "citations": {
+                "name": "citations",
+                "type": "List[str]",
+                "description": "List of citations",
+                "required": False,
+            },
+            "insufficient_info": {
+                "name": "insufficient_info",
+                "type": "bool",
+                "description": "Whether or not the query is insufficient info",
+                "required": True,
+            },
        }
    }
-}
+}