- Introduction of dynamic Retrievers & Specialists

- Introduction of dynamic Processors
- Introduction of caching system
- Introduction of a better template manager
- Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists
- Start adaptation of chat client
This commit is contained in:
Josako
2024-11-15 10:00:53 +01:00
parent 55a8a95f79
commit 1807435339
101 changed files with 4181 additions and 1764 deletions

View File

@@ -1,4 +1,8 @@
import json
import os
from datetime import datetime as dt, timezone as tz
from flask import current_app
from graypy import GELFUDPHandler
import logging
import logging.config
@@ -9,24 +13,173 @@ GRAYLOG_PORT = int(os.environ.get('GRAYLOG_PORT', 12201))
env = os.environ.get('FLASK_ENV', 'development')
class CustomLogRecord(logging.LogRecord):
class TuningLogRecord(logging.LogRecord):
"""Extended LogRecord that handles both tuning and business event logging"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Initialize extra fields after parent initialization
self._extra_fields = {}
self._is_tuning_log = False
self._tuning_type = None
self._tuning_tenant_id = None
self._tuning_catalog_id = None
self._tuning_specialist_id = None
self._tuning_retriever_id = None
self._tuning_processor_id = None
self.component = os.environ.get('COMPONENT_NAME', 'eveai_app')
def __setattr__(self, name, value):
if name not in {'event_type', 'tenant_id', 'trace_id', 'span_id', 'span_name', 'parent_span_id',
'document_version_id', 'chat_session_id', 'interaction_id', 'environment'}:
super().__setattr__(name, value)
def getMessage(self):
"""
Override getMessage to handle both string and dict messages
"""
msg = self.msg
if self.args:
msg = msg % self.args
return msg
@property
def is_tuning_log(self):
return self._is_tuning_log
@is_tuning_log.setter
def is_tuning_log(self, value):
object.__setattr__(self, '_is_tuning_log', value)
@property
def tuning_type(self):
return self._tuning_type
@tuning_type.setter
def tuning_type(self, value):
object.__setattr__(self, '_tuning_type', value)
def get_tuning_data(self):
"""Get all tuning-related data if this is a tuning log"""
if not self._is_tuning_log:
return {}
return {
'is_tuning_log': self._is_tuning_log,
'tuning_type': self._tuning_type,
'tuning_tenant_id': self._tuning_tenant_id,
'tuning_catalog_id': self._tuning_catalog_id,
'tuning_specialist_id': self._tuning_specialist_id,
'tuning_retriever_id': self._tuning_retriever_id,
'tuning_processor_id': self._tuning_processor_id,
}
def set_tuning_data(self, tenant_id=None, catalog_id=None, specialist_id=None,
retriever_id=None, processor_id=None):
"""Set tuning-specific data"""
object.__setattr__(self, '_tuning_tenant_id', tenant_id)
object.__setattr__(self, '_tuning_catalog_id', catalog_id)
object.__setattr__(self, '_tuning_specialist_id', specialist_id)
object.__setattr__(self, '_tuning_retriever_id', retriever_id)
object.__setattr__(self, '_tuning_processor_id', processor_id)
def custom_log_record_factory(*args, **kwargs):
record = CustomLogRecord(*args, **kwargs)
return record
class TuningFormatter(logging.Formatter):
"""Universal formatter for all tuning logs"""
def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt or '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
datefmt or '%Y-%m-%d %H:%M:%S')
def format(self, record):
# First format with the default formatter to handle basic fields
formatted_msg = super().format(record)
# If this is a tuning log, add the additional context
if getattr(record, 'is_tuning_log', False):
try:
identifiers = []
if hasattr(record, 'tenant_id') and record.tenant_id:
identifiers.append(f"Tenant: {record.tenant_id}")
if hasattr(record, 'catalog_id') and record.catalog_id:
identifiers.append(f"Catalog: {record.catalog_id}")
if hasattr(record, 'processor_id') and record.processor_id:
identifiers.append(f"Processor: {record.processor_id}")
formatted_msg = (
f"{formatted_msg}\n"
f"[TUNING {record.tuning_type}] [{' | '.join(identifiers)}]"
)
if hasattr(record, 'tuning_data') and record.tuning_data:
formatted_msg += f"\nData: {json.dumps(record.tuning_data, indent=2)}"
except Exception as e:
return f"{formatted_msg} (Error formatting tuning data: {str(e)})"
return formatted_msg
class GraylogFormatter(logging.Formatter):
"""Maintains existing Graylog formatting while adding tuning fields"""
def format(self, record):
if getattr(record, 'is_tuning_log', False):
# Add tuning-specific fields to Graylog
record.tuning_fields = {
'is_tuning_log': True,
'tuning_type': record.tuning_type,
'tenant_id': record.tenant_id,
'catalog_id': record.catalog_id,
'specialist_id': record.specialist_id,
'retriever_id': record.retriever_id,
'processor_id': record.processor_id,
}
return super().format(record)
class TuningLogger:
"""Helper class to manage tuning logs with consistent structure"""
def __init__(self, logger_name, tenant_id=None, catalog_id=None, specialist_id=None, retriever_id=None, processor_id=None):
self.logger = logging.getLogger(logger_name)
self.tenant_id = tenant_id
self.catalog_id = catalog_id
self.specialist_id = specialist_id
self.retriever_id = retriever_id
self.processor_id = processor_id
def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DEBUG):
"""Log a tuning event with structured data"""
try:
# Create a standard LogRecord for tuning
record = logging.LogRecord(
name=self.logger.name,
level=level,
pathname='',
lineno=0,
msg=message,
args=(),
exc_info=None
)
# Add tuning-specific attributes
record.is_tuning_log = True
record.tuning_type = tuning_type
record.tenant_id = self.tenant_id
record.catalog_id = self.catalog_id
record.specialist_id = self.specialist_id
record.retriever_id = self.retriever_id
record.processor_id = self.processor_id
if data:
record.tuning_data = data
# Process the record
self.logger.handle(record)
except Exception as e:
fallback_logger = logging.getLogger('eveai_workers')
fallback_logger.exception(f"Failed to log tuning message: {str(e)}")
# Set the custom log record factory
logging.setLogRecordFactory(custom_log_record_factory)
logging.setLogRecordFactory(TuningLogRecord)
LOGGING = {
@@ -38,7 +191,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_app.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_workers': {
@@ -46,7 +199,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_workers.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_chat': {
@@ -54,7 +207,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_chat_workers': {
@@ -62,7 +215,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat_workers.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_api': {
@@ -70,7 +223,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_api.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_beat': {
@@ -78,7 +231,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_beat.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_entitlements': {
@@ -86,7 +239,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_entitlements.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_sqlalchemy': {
@@ -94,7 +247,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/sqlalchemy.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_mailman': {
@@ -102,7 +255,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/mailman.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_security': {
@@ -110,7 +263,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/security.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_rag_tuning': {
@@ -118,7 +271,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/rag_tuning.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_embed_tuning': {
@@ -126,7 +279,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/embed_tuning.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'file_business_events': {
@@ -134,7 +287,7 @@ LOGGING = {
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/business_events.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'backupCount': 2,
'formatter': 'standard',
},
'console': {
@@ -142,25 +295,38 @@ LOGGING = {
'level': 'DEBUG',
'formatter': 'standard',
},
'tuning_file': {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/tuning.log',
'maxBytes': 1024 * 1024 * 3, # 3MB
'backupCount': 3,
'formatter': 'tuning',
},
'graylog': {
'level': 'DEBUG',
'class': 'graypy.GELFUDPHandler',
'host': GRAYLOG_HOST,
'port': GRAYLOG_PORT,
'debugging_fields': True, # Set to True if you want to include debugging fields
'extra_fields': True, # Set to True if you want to include extra fields
'debugging_fields': True,
'formatter': 'graylog'
},
},
'formatters': {
'standard': {
'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
'[Thread: %(threadName)s]: %(message)s'
'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d]: %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S'
},
'graylog': {
'format': '[%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
'[Thread: %(threadName)s]: %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S',
'()': GraylogFormatter
},
'tuning': {
'()': TuningFormatter,
'datefmt': '%Y-%m-%d %H:%M:%S UTC'
}
},
'loggers': {
'eveai_app': { # logger for the eveai_app
@@ -213,21 +379,17 @@ LOGGING = {
'level': 'DEBUG',
'propagate': False
},
'rag_tuning': { # logger for the rag_tuning
'handlers': ['file_rag_tuning', 'graylog', ] if env == 'production' else ['file_rag_tuning', ],
'level': 'DEBUG',
'propagate': False
},
'embed_tuning': { # logger for the embed_tuning
'handlers': ['file_embed_tuning', 'graylog', ] if env == 'production' else ['file_embed_tuning', ],
'level': 'DEBUG',
'propagate': False
},
'business_events': {
'handlers': ['file_business_events', 'graylog'],
'level': 'DEBUG',
'propagate': False
},
# Single tuning logger
'tuning': {
'handlers': ['tuning_file', 'graylog'] if env == 'production' else ['tuning_file'],
'level': 'DEBUG',
'propagate': False,
},
'': { # root logger
'handlers': ['console'],
'level': 'WARNING', # Set higher level for root to minimize noise