eveAI/config/logging_config.py


import json
import os
import sys
from datetime import datetime as dt, timezone as tz

from flask import current_app
import logging
import logging.config

env = os.environ.get('FLASK_ENV', 'development')


def pad_string(s, target_length=100, pad_char='-'):
    """
    Pads a string with the specified character until it reaches the target length.

    Args:
        s: The original string
        target_length: The desired total length
        pad_char: Character to use for padding

    Returns:
        The padded string
    """
    current_length = len(s)
    if current_length >= target_length:
        return s

    padding_needed = target_length - current_length - 1
    return s + " " + (pad_char * padding_needed)


class TuningLogRecord(logging.LogRecord):
    """Extended LogRecord that handles both tuning and business event logging"""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Initialize extra fields after parent initialization
        self._extra_fields = {}
        self._is_tuning_log = False
        self._tuning_type = None
        self._tuning_tenant_id = None
        self._tuning_catalog_id = None
        self._tuning_specialist_id = None
        self._tuning_retriever_id = None
        self._tuning_processor_id = None
        self._session_id = None
        self.component = os.environ.get('COMPONENT_NAME', 'eveai_app')

    def getMessage(self):
        """
        Override getMessage to handle both string and dict messages
        """
        msg = self.msg
        if self.args:
            msg = msg % self.args
        return msg

    @property
    def is_tuning_log(self):
        return self._is_tuning_log

    @is_tuning_log.setter
    def is_tuning_log(self, value):
        object.__setattr__(self, '_is_tuning_log', value)

    @property
    def tuning_type(self):
        return self._tuning_type

    @tuning_type.setter
    def tuning_type(self, value):
        object.__setattr__(self, '_tuning_type', value)

    def get_tuning_data(self):
        """Get all tuning-related data if this is a tuning log"""
        if not self._is_tuning_log:
            return {}

        return {
            'is_tuning_log': self._is_tuning_log,
            'tuning_type': self._tuning_type,
            'tuning_tenant_id': self._tuning_tenant_id,
            'tuning_catalog_id': self._tuning_catalog_id,
            'tuning_specialist_id': self._tuning_specialist_id,
            'tuning_retriever_id': self._tuning_retriever_id,
            'tuning_processor_id': self._tuning_processor_id,
            'session_id': self._session_id,
        }

    def set_tuning_data(self, tenant_id=None, catalog_id=None, specialist_id=None,
                        retriever_id=None, processor_id=None, session_id=None,):
        """Set tuning-specific data"""
        object.__setattr__(self, '_tuning_tenant_id', tenant_id)
        object.__setattr__(self, '_tuning_catalog_id', catalog_id)
        object.__setattr__(self, '_tuning_specialist_id', specialist_id)
        object.__setattr__(self, '_tuning_retriever_id', retriever_id)
        object.__setattr__(self, '_tuning_processor_id', processor_id)
        object.__setattr__(self, '_session_id', session_id)


class TuningFormatter(logging.Formatter):
    """Universal formatter for all tuning logs"""

    def __init__(self, fmt=None, datefmt=None):
        super().__init__(fmt or '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
                         datefmt or '%Y-%m-%d %H:%M:%S')

    def format(self, record):
        # First format with the default formatter to handle basic fields
        formatted_msg = super().format(record)

        # If this is a tuning log, add the additional context
        if getattr(record, 'is_tuning_log', False):
            try:
                identifiers = []
                if hasattr(record, 'tenant_id') and record.tenant_id:
                    identifiers.append(f"Tenant: {record.tenant_id}")
                if hasattr(record, 'catalog_id') and record.catalog_id:
                    identifiers.append(f"Catalog: {record.catalog_id}")
                if hasattr(record, 'processor_id') and record.processor_id:
                    identifiers.append(f"Processor: {record.processor_id}")
                if hasattr(record, 'specialist_id') and record.specialist_id:
                    identifiers.append(f"Specialist: {record.specialist_id}")
                if hasattr(record, 'retriever_id') and record.retriever_id:
                    identifiers.append(f"Retriever: {record.retriever_id}")
                if hasattr(record, 'session_id') and record.session_id:
                    identifiers.append(f"Session: {record.session_id}")

                formatted_msg = (
                    f"{formatted_msg}\n"
                    f"[TUNING {record.tuning_type}] [{' | '.join(identifiers)}]"
                )

                if hasattr(record, 'tuning_data') and record.tuning_data:
                    formatted_msg += f"\nData: {json.dumps(record.tuning_data, indent=2)}"

            except Exception as e:
                return f"{formatted_msg} (Error formatting tuning data: {str(e)})"

        return formatted_msg


class TuningLogger:
    """Helper class to manage tuning logs with consistent structure"""

    def __init__(self, logger_name, tenant_id=None, catalog_id=None, specialist_id=None, retriever_id=None,
                 processor_id=None, session_id=None, log_file=None):
        """
        Initialize a tuning logger

        Args:
            logger_name: Base name for the logger
            tenant_id: Optional tenant ID for context
            catalog_id: Optional catalog ID for context
            specialist_id: Optional specialist ID for context
            retriever_id: Optional retriever ID for context
            processor_id: Optional processor ID for context
            session_id: Optional session ID for context
            log_file: Optional custom log file name (ignored - all logs go to tuning.log)
        """
        # Always use the standard tuning logger
        self.logger = logging.getLogger(logger_name)
        self.tenant_id = tenant_id
        self.catalog_id = catalog_id
        self.specialist_id = specialist_id
        self.retriever_id = retriever_id
        self.processor_id = processor_id
        self.session_id = session_id

    def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DEBUG):
        """Log a tuning event with structured data"""
        try:
            # Create a standard LogRecord for tuning
            record = logging.LogRecord(
                name=self.logger.name,
                level=level,
                pathname='',
                lineno=0,
                msg=pad_string(message, 100, '-'),
                args=(),
                exc_info=None
            )

            # Add tuning-specific attributes
            record.is_tuning_log = True
            record.tuning_type = tuning_type
            record.tenant_id = self.tenant_id
            record.catalog_id = self.catalog_id
            record.specialist_id = self.specialist_id
            record.retriever_id = self.retriever_id
            record.processor_id = self.processor_id
            record.session_id = self.session_id

            if data:
                record.tuning_data = data

            # Process the record
            self.logger.handle(record)

        except Exception as e:
            print(f"Failed to log tuning message: {str(e)}")


# Set the custom log record factory
logging.setLogRecordFactory(TuningLogRecord)

def configure_logging():
    """Configure logging based on environment

    When running in Kubernetes, directs logs to stdout in JSON format
    Otherwise uses file-based logging for development/testing
    """
    try:
        # Verkrijg het absolute pad naar de logs directory
        base_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
        logs_dir = os.path.join(base_dir, 'logs')

        # Zorg ervoor dat de logs directory bestaat met de juiste permissies
        if not os.path.exists(logs_dir):
            try:
                os.makedirs(logs_dir, exist_ok=True)
                print(f"Logs directory aangemaakt op: {logs_dir}")
            except (IOError, PermissionError) as e:
                print(f"WAARSCHUWING: Kan logs directory niet aanmaken: {e}")
                print(f"Logs worden mogelijk niet correct geschreven!")

        # Check if running in Kubernetes
        in_kubernetes = os.environ.get('KUBERNETES_SERVICE_HOST') is not None

        # Controleer of de pythonjsonlogger pakket beschikbaar is als we in Kubernetes zijn
        if in_kubernetes:
            try:
                import pythonjsonlogger.jsonlogger
                has_json_logger = True
            except ImportError:
                print("WAARSCHUWING: python-json-logger pakket is niet geïnstalleerd.")
                print("Voer 'pip install python-json-logger>=2.0.7' uit om JSON logging in te schakelen.")
                print("Terugvallen op standaard logging formaat.")
                has_json_logger = False
                in_kubernetes = False  # Fall back to standard logging
        else:
            has_json_logger = False

        # Apply the configuration
        logging_config = dict(LOGGING)

        # Wijzig de json_console handler om terug te vallen op console als pythonjsonlogger niet beschikbaar is
        if not has_json_logger and 'json_console' in logging_config['handlers']:
            # Vervang json_console handler door een console handler met standaard formatter
            logging_config['handlers']['json_console']['formatter'] = 'standard'

        # In Kubernetes, conditionally modify specific loggers to use JSON console output
        # This preserves the same logger names but changes where/how they log
        if in_kubernetes:
            for logger_name in logging_config['loggers']:
                if logger_name:  # Skip the root logger
                    logging_config['loggers'][logger_name]['handlers'] = ['json_console']

        # Controleer of de logs directory schrijfbaar is voordat we de configuratie toepassen
        logs_dir = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs')
        if os.path.exists(logs_dir) and not os.access(logs_dir, os.W_OK):
            print(f"WAARSCHUWING: Logs directory bestaat maar is niet schrijfbaar: {logs_dir}")
            print("Logs worden mogelijk niet correct geschreven!")

        logging.config.dictConfig(logging_config)
        logging.info(f"Logging configured. Environment: {'Kubernetes' if in_kubernetes else 'Development/Testing'}")
        logging.info(f"Logs directory: {logs_dir}")
    except Exception as e:
        print(f"Error configuring logging: {str(e)}")
        print("Gedetailleerde foutinformatie:")
        import traceback
        traceback.print_exc()
        # Fall back to basic configuration
        logging.basicConfig(level=logging.INFO)


LOGGING = {
    'version': 1,
    'disable_existing_loggers': False,
    'handlers': {
        'file_app': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_app.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_workers': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_workers.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_chat_client': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_chat_client.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_chat_workers': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_chat_workers.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_api': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_api.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_beat': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_beat.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_entitlements': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_entitlements.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_sqlalchemy': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'sqlalchemy.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_security': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'security.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_rag_tuning': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'rag_tuning.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_embed_tuning': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'embed_tuning.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'file_business_events': {
            'level': 'INFO',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'business_events.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
        },
        'console': {
            'class': 'logging.StreamHandler',
            'level': 'DEBUG',
            'formatter': 'standard',
        },
        'json_console': {
            'class': 'logging.StreamHandler',
            'level': 'INFO',
            'formatter': 'json',
            'stream': 'ext://sys.stdout',
        },
        'tuning_file': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'tuning.log'),
            'maxBytes': 1024 * 1024 * 3,  # 3MB
            'backupCount': 3,
            'formatter': 'tuning',
        },
    },
    'formatters': {
        'standard': {
            'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d]: %(message)s',
            'datefmt': '%Y-%m-%d %H:%M:%S'
        },
        'tuning': {
            '()': TuningFormatter,
            'datefmt': '%Y-%m-%d %H:%M:%S UTC'
        },
        'json': {
            'format': '%(message)s',
            'class': 'logging.Formatter' if not 'pythonjsonlogger' in sys.modules else 'pythonjsonlogger.jsonlogger.JsonFormatter',
            'json_default': lambda obj: str(obj) if isinstance(obj, (dt, Exception)) else None,
            'json_ensure_ascii': False,
            'rename_fields': {
                'asctime': 'timestamp',
                'levelname': 'severity'
            },
            'timestamp': True,
            'datefmt': '%Y-%m-%dT%H:%M:%S.%fZ'
        }
    },
    'loggers': {
        'eveai_app': {  # logger for the eveai_app
            'handlers': ['file_app'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_workers': {  # logger for the eveai_workers
            'handlers': ['file_workers'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_chat_client': {  # logger for the eveai_chat
            'handlers': ['file_chat_client'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_chat_workers': {  # logger for the eveai_chat_workers
            'handlers': ['file_chat_workers'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_api': {  # logger for the eveai_api
            'handlers': ['file_api'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_beat': {  # logger for the eveai_beat
            'handlers': ['file_beat'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_entitlements': {  # logger for the eveai_entitlements
            'handlers': ['file_entitlements'],
            'level': 'DEBUG',
            'propagate': False
        },
        'sqlalchemy.engine': {  # logger for the sqlalchemy
            'handlers': ['file_sqlalchemy'],
            'level': 'DEBUG',
            'propagate': False
        },
        'security': {  # logger for the security
            'handlers': ['file_security'],
            'level': 'DEBUG',
            'propagate': False
        },
        'business_events': {
            'handlers': ['file_business_events'],
            'level': 'DEBUG',
            'propagate': False
        },
        # Single tuning logger
        'tuning': {
            'handlers': ['tuning_file'],
            'level': 'DEBUG',
            'propagate': False,
        },
        '': {  # root logger
            'handlers': ['console'] if os.environ.get('KUBERNETES_SERVICE_HOST') is None else ['json_console'],
            'level': 'WARNING',  # Set higher level for root to minimize noise
            'propagate': False
        },
    }
}