- Traicie Selection Specialist Round Trip - Session improvements + debugging enabled - Tone of Voice & Langauge Level definitions introduced
486 lines
19 KiB
Python
486 lines
19 KiB
Python
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime as dt, timezone as tz
|
|
|
|
from flask import current_app
|
|
import logging
|
|
import logging.config
|
|
|
|
env = os.environ.get('FLASK_ENV', 'development')
|
|
|
|
|
|
def pad_string(s, target_length=100, pad_char='-'):
|
|
"""
|
|
Pads a string with the specified character until it reaches the target length.
|
|
|
|
Args:
|
|
s: The original string
|
|
target_length: The desired total length
|
|
pad_char: Character to use for padding
|
|
|
|
Returns:
|
|
The padded string
|
|
"""
|
|
current_length = len(s)
|
|
if current_length >= target_length:
|
|
return s
|
|
|
|
padding_needed = target_length - current_length - 1
|
|
return s + " " + (pad_char * padding_needed)
|
|
|
|
|
|
class TuningLogRecord(logging.LogRecord):
|
|
"""Extended LogRecord that handles both tuning and business event logging"""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
# Initialize extra fields after parent initialization
|
|
self._extra_fields = {}
|
|
self._is_tuning_log = False
|
|
self._tuning_type = None
|
|
self._tuning_tenant_id = None
|
|
self._tuning_catalog_id = None
|
|
self._tuning_specialist_id = None
|
|
self._tuning_retriever_id = None
|
|
self._tuning_processor_id = None
|
|
self._session_id = None
|
|
self.component = os.environ.get('COMPONENT_NAME', 'eveai_app')
|
|
|
|
def getMessage(self):
|
|
"""
|
|
Override getMessage to handle both string and dict messages
|
|
"""
|
|
msg = self.msg
|
|
if self.args:
|
|
msg = msg % self.args
|
|
return msg
|
|
|
|
@property
|
|
def is_tuning_log(self):
|
|
return self._is_tuning_log
|
|
|
|
@is_tuning_log.setter
|
|
def is_tuning_log(self, value):
|
|
object.__setattr__(self, '_is_tuning_log', value)
|
|
|
|
@property
|
|
def tuning_type(self):
|
|
return self._tuning_type
|
|
|
|
@tuning_type.setter
|
|
def tuning_type(self, value):
|
|
object.__setattr__(self, '_tuning_type', value)
|
|
|
|
def get_tuning_data(self):
|
|
"""Get all tuning-related data if this is a tuning log"""
|
|
if not self._is_tuning_log:
|
|
return {}
|
|
|
|
return {
|
|
'is_tuning_log': self._is_tuning_log,
|
|
'tuning_type': self._tuning_type,
|
|
'tuning_tenant_id': self._tuning_tenant_id,
|
|
'tuning_catalog_id': self._tuning_catalog_id,
|
|
'tuning_specialist_id': self._tuning_specialist_id,
|
|
'tuning_retriever_id': self._tuning_retriever_id,
|
|
'tuning_processor_id': self._tuning_processor_id,
|
|
'session_id': self._session_id,
|
|
}
|
|
|
|
def set_tuning_data(self, tenant_id=None, catalog_id=None, specialist_id=None,
|
|
retriever_id=None, processor_id=None, session_id=None,):
|
|
"""Set tuning-specific data"""
|
|
object.__setattr__(self, '_tuning_tenant_id', tenant_id)
|
|
object.__setattr__(self, '_tuning_catalog_id', catalog_id)
|
|
object.__setattr__(self, '_tuning_specialist_id', specialist_id)
|
|
object.__setattr__(self, '_tuning_retriever_id', retriever_id)
|
|
object.__setattr__(self, '_tuning_processor_id', processor_id)
|
|
object.__setattr__(self, '_session_id', session_id)
|
|
|
|
|
|
class TuningFormatter(logging.Formatter):
|
|
"""Universal formatter for all tuning logs"""
|
|
|
|
def __init__(self, fmt=None, datefmt=None):
|
|
super().__init__(fmt or '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
|
datefmt or '%Y-%m-%d %H:%M:%S')
|
|
|
|
def format(self, record):
|
|
# First format with the default formatter to handle basic fields
|
|
formatted_msg = super().format(record)
|
|
|
|
# If this is a tuning log, add the additional context
|
|
if getattr(record, 'is_tuning_log', False):
|
|
try:
|
|
identifiers = []
|
|
if hasattr(record, 'tenant_id') and record.tenant_id:
|
|
identifiers.append(f"Tenant: {record.tenant_id}")
|
|
if hasattr(record, 'catalog_id') and record.catalog_id:
|
|
identifiers.append(f"Catalog: {record.catalog_id}")
|
|
if hasattr(record, 'processor_id') and record.processor_id:
|
|
identifiers.append(f"Processor: {record.processor_id}")
|
|
if hasattr(record, 'specialist_id') and record.specialist_id:
|
|
identifiers.append(f"Specialist: {record.specialist_id}")
|
|
if hasattr(record, 'retriever_id') and record.retriever_id:
|
|
identifiers.append(f"Retriever: {record.retriever_id}")
|
|
if hasattr(record, 'session_id') and record.session_id:
|
|
identifiers.append(f"Session: {record.session_id}")
|
|
|
|
formatted_msg = (
|
|
f"{formatted_msg}\n"
|
|
f"[TUNING {record.tuning_type}] [{' | '.join(identifiers)}]"
|
|
)
|
|
|
|
if hasattr(record, 'tuning_data') and record.tuning_data:
|
|
formatted_msg += f"\nData: {json.dumps(record.tuning_data, indent=2)}"
|
|
|
|
except Exception as e:
|
|
return f"{formatted_msg} (Error formatting tuning data: {str(e)})"
|
|
|
|
return formatted_msg
|
|
|
|
|
|
|
|
class TuningLogger:
|
|
"""Helper class to manage tuning logs with consistent structure"""
|
|
|
|
def __init__(self, logger_name, tenant_id=None, catalog_id=None, specialist_id=None, retriever_id=None,
|
|
processor_id=None, session_id=None, log_file=None):
|
|
"""
|
|
Initialize a tuning logger
|
|
|
|
Args:
|
|
logger_name: Base name for the logger
|
|
tenant_id: Optional tenant ID for context
|
|
catalog_id: Optional catalog ID for context
|
|
specialist_id: Optional specialist ID for context
|
|
retriever_id: Optional retriever ID for context
|
|
processor_id: Optional processor ID for context
|
|
session_id: Optional session ID for context
|
|
log_file: Optional custom log file name (ignored - all logs go to tuning.log)
|
|
"""
|
|
# Always use the standard tuning logger
|
|
self.logger = logging.getLogger(logger_name)
|
|
self.tenant_id = tenant_id
|
|
self.catalog_id = catalog_id
|
|
self.specialist_id = specialist_id
|
|
self.retriever_id = retriever_id
|
|
self.processor_id = processor_id
|
|
self.session_id = session_id
|
|
|
|
def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DEBUG):
|
|
"""Log a tuning event with structured data"""
|
|
try:
|
|
# Create a standard LogRecord for tuning
|
|
record = logging.LogRecord(
|
|
name=self.logger.name,
|
|
level=level,
|
|
pathname='',
|
|
lineno=0,
|
|
msg=pad_string(message, 100, '-'),
|
|
args=(),
|
|
exc_info=None
|
|
)
|
|
|
|
# Add tuning-specific attributes
|
|
record.is_tuning_log = True
|
|
record.tuning_type = tuning_type
|
|
record.tenant_id = self.tenant_id
|
|
record.catalog_id = self.catalog_id
|
|
record.specialist_id = self.specialist_id
|
|
record.retriever_id = self.retriever_id
|
|
record.processor_id = self.processor_id
|
|
record.session_id = self.session_id
|
|
|
|
if data:
|
|
record.tuning_data = data
|
|
|
|
# Process the record
|
|
self.logger.handle(record)
|
|
|
|
except Exception as e:
|
|
print(f"Failed to log tuning message: {str(e)}")
|
|
|
|
|
|
# Set the custom log record factory
|
|
logging.setLogRecordFactory(TuningLogRecord)
|
|
|
|
def configure_logging():
|
|
"""Configure logging based on environment
|
|
|
|
When running in Kubernetes, directs logs to stdout in JSON format
|
|
Otherwise uses file-based logging for development/testing
|
|
"""
|
|
try:
|
|
# Verkrijg het absolute pad naar de logs directory
|
|
base_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
|
logs_dir = os.path.join(base_dir, 'logs')
|
|
|
|
# Zorg ervoor dat de logs directory bestaat met de juiste permissies
|
|
if not os.path.exists(logs_dir):
|
|
try:
|
|
os.makedirs(logs_dir, exist_ok=True)
|
|
print(f"Logs directory aangemaakt op: {logs_dir}")
|
|
except (IOError, PermissionError) as e:
|
|
print(f"WAARSCHUWING: Kan logs directory niet aanmaken: {e}")
|
|
print(f"Logs worden mogelijk niet correct geschreven!")
|
|
|
|
# Check if running in Kubernetes
|
|
in_kubernetes = os.environ.get('KUBERNETES_SERVICE_HOST') is not None
|
|
|
|
# Controleer of de pythonjsonlogger pakket beschikbaar is als we in Kubernetes zijn
|
|
if in_kubernetes:
|
|
try:
|
|
import pythonjsonlogger.jsonlogger
|
|
has_json_logger = True
|
|
except ImportError:
|
|
print("WAARSCHUWING: python-json-logger pakket is niet geïnstalleerd.")
|
|
print("Voer 'pip install python-json-logger>=2.0.7' uit om JSON logging in te schakelen.")
|
|
print("Terugvallen op standaard logging formaat.")
|
|
has_json_logger = False
|
|
in_kubernetes = False # Fall back to standard logging
|
|
else:
|
|
has_json_logger = False
|
|
|
|
# Apply the configuration
|
|
logging_config = dict(LOGGING)
|
|
|
|
# Wijzig de json_console handler om terug te vallen op console als pythonjsonlogger niet beschikbaar is
|
|
if not has_json_logger and 'json_console' in logging_config['handlers']:
|
|
# Vervang json_console handler door een console handler met standaard formatter
|
|
logging_config['handlers']['json_console']['formatter'] = 'standard'
|
|
|
|
# In Kubernetes, conditionally modify specific loggers to use JSON console output
|
|
# This preserves the same logger names but changes where/how they log
|
|
if in_kubernetes:
|
|
for logger_name in logging_config['loggers']:
|
|
if logger_name: # Skip the root logger
|
|
logging_config['loggers'][logger_name]['handlers'] = ['json_console']
|
|
|
|
# Controleer of de logs directory schrijfbaar is voordat we de configuratie toepassen
|
|
logs_dir = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs')
|
|
if os.path.exists(logs_dir) and not os.access(logs_dir, os.W_OK):
|
|
print(f"WAARSCHUWING: Logs directory bestaat maar is niet schrijfbaar: {logs_dir}")
|
|
print("Logs worden mogelijk niet correct geschreven!")
|
|
|
|
logging.config.dictConfig(logging_config)
|
|
logging.info(f"Logging configured. Environment: {'Kubernetes' if in_kubernetes else 'Development/Testing'}")
|
|
logging.info(f"Logs directory: {logs_dir}")
|
|
except Exception as e:
|
|
print(f"Error configuring logging: {str(e)}")
|
|
print("Gedetailleerde foutinformatie:")
|
|
import traceback
|
|
traceback.print_exc()
|
|
# Fall back to basic configuration
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
LOGGING = {
|
|
'version': 1,
|
|
'disable_existing_loggers': False,
|
|
'handlers': {
|
|
'file_app': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_app.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_workers': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_workers.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_chat_client': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_chat_client.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_chat_workers': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_chat_workers.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_api': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_api.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_beat': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_beat.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_entitlements': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_entitlements.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_sqlalchemy': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'sqlalchemy.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_security': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'security.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_rag_tuning': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'rag_tuning.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_embed_tuning': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'embed_tuning.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'file_business_events': {
|
|
'level': 'INFO',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'business_events.log'),
|
|
'maxBytes': 1024 * 1024 * 1, # 1MB
|
|
'backupCount': 2,
|
|
'formatter': 'standard',
|
|
},
|
|
'console': {
|
|
'class': 'logging.StreamHandler',
|
|
'level': 'DEBUG',
|
|
'formatter': 'standard',
|
|
},
|
|
'json_console': {
|
|
'class': 'logging.StreamHandler',
|
|
'level': 'INFO',
|
|
'formatter': 'json',
|
|
'stream': 'ext://sys.stdout',
|
|
},
|
|
'tuning_file': {
|
|
'level': 'DEBUG',
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'tuning.log'),
|
|
'maxBytes': 1024 * 1024 * 3, # 3MB
|
|
'backupCount': 3,
|
|
'formatter': 'tuning',
|
|
},
|
|
},
|
|
'formatters': {
|
|
'standard': {
|
|
'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d]: %(message)s',
|
|
'datefmt': '%Y-%m-%d %H:%M:%S'
|
|
},
|
|
'tuning': {
|
|
'()': TuningFormatter,
|
|
'datefmt': '%Y-%m-%d %H:%M:%S UTC'
|
|
},
|
|
'json': {
|
|
'format': '%(message)s',
|
|
'class': 'logging.Formatter' if not 'pythonjsonlogger' in sys.modules else 'pythonjsonlogger.jsonlogger.JsonFormatter',
|
|
'json_default': lambda obj: str(obj) if isinstance(obj, (dt, Exception)) else None,
|
|
'json_ensure_ascii': False,
|
|
'rename_fields': {
|
|
'asctime': 'timestamp',
|
|
'levelname': 'severity'
|
|
},
|
|
'timestamp': True,
|
|
'datefmt': '%Y-%m-%dT%H:%M:%S.%fZ'
|
|
}
|
|
},
|
|
'loggers': {
|
|
'eveai_app': { # logger for the eveai_app
|
|
'handlers': ['file_app'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'eveai_workers': { # logger for the eveai_workers
|
|
'handlers': ['file_workers'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'eveai_chat_client': { # logger for the eveai_chat
|
|
'handlers': ['file_chat_client'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'eveai_chat_workers': { # logger for the eveai_chat_workers
|
|
'handlers': ['file_chat_workers'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'eveai_api': { # logger for the eveai_api
|
|
'handlers': ['file_api'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'eveai_beat': { # logger for the eveai_beat
|
|
'handlers': ['file_beat'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'eveai_entitlements': { # logger for the eveai_entitlements
|
|
'handlers': ['file_entitlements'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'sqlalchemy.engine': { # logger for the sqlalchemy
|
|
'handlers': ['file_sqlalchemy'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'security': { # logger for the security
|
|
'handlers': ['file_security'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
'business_events': {
|
|
'handlers': ['file_business_events'],
|
|
'level': 'DEBUG',
|
|
'propagate': False
|
|
},
|
|
# Single tuning logger
|
|
'tuning': {
|
|
'handlers': ['tuning_file'],
|
|
'level': 'DEBUG',
|
|
'propagate': False,
|
|
},
|
|
'': { # root logger
|
|
'handlers': ['console'] if os.environ.get('KUBERNETES_SERVICE_HOST') is None else ['json_console'],
|
|
'level': 'WARNING', # Set higher level for root to minimize noise
|
|
'propagate': False
|
|
},
|
|
}
|
|
} |