- logging improvement and simplification (no more graylog)

- Traicie Selection Specialist Round Trip - Session improvements + debugging enabled - Tone of Voice & Langauge Level definitions introduced
2025-06-20 07:58:06 +02:00
parent babcd6ec04
commit 5b2c04501c
29 changed files with 916 additions and 167 deletions
--- a/config/logging_config.py
+++ b/config/logging_config.py
@@ -1,15 +1,13 @@
+
 import json
 import os
+import sys
 from datetime import datetime as dt, timezone as tz

 from flask import current_app
-from graypy import GELFUDPHandler
 import logging
 import logging.config

-# Graylog configuration
-GRAYLOG_HOST = os.environ.get('GRAYLOG_HOST', 'localhost')
-GRAYLOG_PORT = int(os.environ.get('GRAYLOG_PORT', 12201))
 env = os.environ.get('FLASK_ENV', 'development')


@@ -144,23 +142,6 @@ class TuningFormatter(logging.Formatter):
        return formatted_msg


-class GraylogFormatter(logging.Formatter):
-    """Maintains existing Graylog formatting while adding tuning fields"""
-
-    def format(self, record):
-        if getattr(record, 'is_tuning_log', False):
-            # Add tuning-specific fields to Graylog
-            record.tuning_fields = {
-                'is_tuning_log': True,
-                'tuning_type': record.tuning_type,
-                'tenant_id': record.tenant_id,
-                'catalog_id': record.catalog_id,
-                'specialist_id': record.specialist_id,
-                'retriever_id': record.retriever_id,
-                'processor_id': record.processor_id,
-                'session_id': record.session_id,
-            }
-        return super().format(record)

 class TuningLogger:
    """Helper class to manage tuning logs with consistent structure"""
@@ -177,10 +158,10 @@ class TuningLogger:
            specialist_id: Optional specialist ID for context
            retriever_id: Optional retriever ID for context
            processor_id: Optional processor ID for context
-            session_id: Optional session ID for context and log file naming
-            log_file: Optional custom log file name to use
+            session_id: Optional session ID for context
+            log_file: Optional custom log file name (ignored - all logs go to tuning.log)
        """
-
+        # Always use the standard tuning logger
        self.logger = logging.getLogger(logger_name)
        self.tenant_id = tenant_id
        self.catalog_id = catalog_id
@@ -188,63 +169,8 @@ class TuningLogger:
        self.retriever_id = retriever_id
        self.processor_id = processor_id
        self.session_id = session_id
-        self.log_file = log_file
-        # Determine whether to use a session-specific logger
-        if session_id:
-            # Create a unique logger name for this session
-            session_logger_name = f"{logger_name}_{session_id}"
-            self.logger = logging.getLogger(session_logger_name)

-            # If this logger doesn't have handlers yet, configure it
-            if not self.logger.handlers:
-                # Determine log file path
-                if not log_file and session_id:
-                    log_file = f"logs/tuning_{session_id}.log"
-                elif not log_file:
-                    log_file = "logs/tuning.log"
-
-                # Configure the logger
-                self._configure_session_logger(log_file)
-        else:
-            # Use the standard tuning logger
-            self.logger = logging.getLogger(logger_name)
-
-    def _configure_session_logger(self, log_file):
-        """Configure a new session-specific logger with appropriate handlers"""
-        # Create and configure a file handler
-        file_handler = logging.handlers.RotatingFileHandler(
-            filename=log_file,
-            maxBytes=1024 * 1024 * 3,  # 3MB
-            backupCount=3
-        )
-        file_handler.setFormatter(TuningFormatter())
-        file_handler.setLevel(logging.DEBUG)
-
-        # Add the file handler to the logger
-        self.logger.addHandler(file_handler)
-
-        # Add Graylog handler in production
-        env = os.environ.get('FLASK_ENV', 'development')
-        if env == 'production':
-            try:
-                graylog_handler = GELFUDPHandler(
-                    host=GRAYLOG_HOST,
-                    port=GRAYLOG_PORT,
-                    debugging_fields=True
-                )
-                graylog_handler.setFormatter(GraylogFormatter())
-                self.logger.addHandler(graylog_handler)
-            except Exception as e:
-                # Fall back to just file logging if Graylog setup fails
-                fallback_logger = logging.getLogger('eveai_app')
-                fallback_logger.warning(f"Failed to set up Graylog handler: {str(e)}")
-
-        # Set logger level and disable propagation
-        self.logger.setLevel(logging.DEBUG)
-        self.logger.propagate = False
-
-
-def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DEBUG):
+    def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DEBUG):
        """Log a tuning event with structured data"""
        try:
            # Create a standard LogRecord for tuning
@@ -275,13 +201,82 @@ def log_tuning(self, tuning_type: str, message: str, data=None, level=logging.DE
            self.logger.handle(record)

        except Exception as e:
-            fallback_logger = logging.getLogger('eveai_workers')
-            fallback_logger.exception(f"Failed to log tuning message: {str(e)}")
+            print(f"Failed to log tuning message: {str(e)}")


 # Set the custom log record factory
 logging.setLogRecordFactory(TuningLogRecord)

+def configure_logging():
+    """Configure logging based on environment
+
+    When running in Kubernetes, directs logs to stdout in JSON format
+    Otherwise uses file-based logging for development/testing
+    """
+    try:
+        # Verkrijg het absolute pad naar de logs directory
+        base_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+        logs_dir = os.path.join(base_dir, 'logs')
+
+        # Zorg ervoor dat de logs directory bestaat met de juiste permissies
+        if not os.path.exists(logs_dir):
+            try:
+                os.makedirs(logs_dir, exist_ok=True)
+                print(f"Logs directory aangemaakt op: {logs_dir}")
+            except (IOError, PermissionError) as e:
+                print(f"WAARSCHUWING: Kan logs directory niet aanmaken: {e}")
+                print(f"Logs worden mogelijk niet correct geschreven!")
+
+        # Check if running in Kubernetes
+        in_kubernetes = os.environ.get('KUBERNETES_SERVICE_HOST') is not None
+
+        # Controleer of de pythonjsonlogger pakket beschikbaar is als we in Kubernetes zijn
+        if in_kubernetes:
+            try:
+                import pythonjsonlogger.jsonlogger
+                has_json_logger = True
+            except ImportError:
+                print("WAARSCHUWING: python-json-logger pakket is niet geïnstalleerd.")
+                print("Voer 'pip install python-json-logger>=2.0.7' uit om JSON logging in te schakelen.")
+                print("Terugvallen op standaard logging formaat.")
+                has_json_logger = False
+                in_kubernetes = False  # Fall back to standard logging
+        else:
+            has_json_logger = False
+
+        # Apply the configuration
+        logging_config = dict(LOGGING)
+
+        # Wijzig de json_console handler om terug te vallen op console als pythonjsonlogger niet beschikbaar is
+        if not has_json_logger and 'json_console' in logging_config['handlers']:
+            # Vervang json_console handler door een console handler met standaard formatter
+            logging_config['handlers']['json_console']['formatter'] = 'standard'
+
+        # In Kubernetes, conditionally modify specific loggers to use JSON console output
+        # This preserves the same logger names but changes where/how they log
+        if in_kubernetes:
+            for logger_name in logging_config['loggers']:
+                if logger_name:  # Skip the root logger
+                    logging_config['loggers'][logger_name]['handlers'] = ['json_console']
+
+        # Controleer of de logs directory schrijfbaar is voordat we de configuratie toepassen
+        logs_dir = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs')
+        if os.path.exists(logs_dir) and not os.access(logs_dir, os.W_OK):
+            print(f"WAARSCHUWING: Logs directory bestaat maar is niet schrijfbaar: {logs_dir}")
+            print("Logs worden mogelijk niet correct geschreven!")
+
+        logging.config.dictConfig(logging_config)
+        logging.info(f"Logging configured. Environment: {'Kubernetes' if in_kubernetes else 'Development/Testing'}")
+        logging.info(f"Logs directory: {logs_dir}")
+    except Exception as e:
+        print(f"Error configuring logging: {str(e)}")
+        print("Gedetailleerde foutinformatie:")
+        import traceback
+        traceback.print_exc()
+        # Fall back to basic configuration
+        logging.basicConfig(level=logging.INFO)
+
+

 LOGGING = {
    'version': 1,
@@ -290,7 +285,7 @@ LOGGING = {
        'file_app': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_app.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_app.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -298,7 +293,7 @@ LOGGING = {
        'file_workers': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_workers.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_workers.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -306,7 +301,7 @@ LOGGING = {
        'file_chat_client': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_chat_client.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_chat_client.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -314,7 +309,7 @@ LOGGING = {
        'file_chat_workers': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_chat_workers.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_chat_workers.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -322,7 +317,7 @@ LOGGING = {
        'file_api': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_api.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_api.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -330,7 +325,7 @@ LOGGING = {
        'file_beat': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_beat.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_beat.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -338,7 +333,7 @@ LOGGING = {
        'file_entitlements': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/eveai_entitlements.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'eveai_entitlements.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -346,7 +341,7 @@ LOGGING = {
        'file_sqlalchemy': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/sqlalchemy.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'sqlalchemy.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -354,7 +349,7 @@ LOGGING = {
        'file_security': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/security.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'security.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -362,7 +357,7 @@ LOGGING = {
        'file_rag_tuning': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/rag_tuning.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'rag_tuning.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -370,7 +365,7 @@ LOGGING = {
        'file_embed_tuning': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/embed_tuning.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'embed_tuning.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -378,7 +373,7 @@ LOGGING = {
        'file_business_events': {
            'level': 'INFO',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/business_events.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'business_events.log'),
            'maxBytes': 1024 * 1024 * 1,  # 1MB
            'backupCount': 2,
            'formatter': 'standard',
@@ -388,100 +383,104 @@ LOGGING = {
            'level': 'DEBUG',
            'formatter': 'standard',
        },
+        'json_console': {
+            'class': 'logging.StreamHandler',
+            'level': 'INFO',
+            'formatter': 'json',
+            'stream': 'ext://sys.stdout',
+        },
        'tuning_file': {
            'level': 'DEBUG',
            'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/tuning.log',
+            'filename': os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'logs', 'tuning.log'),
            'maxBytes': 1024 * 1024 * 3,  # 3MB
            'backupCount': 3,
            'formatter': 'tuning',
        },
-        'graylog': {
-            'level': 'DEBUG',
-            'class': 'graypy.GELFUDPHandler',
-            'host': GRAYLOG_HOST,
-            'port': GRAYLOG_PORT,
-            'debugging_fields': True,
-            'formatter': 'graylog'
-        },
    },
    'formatters': {
        'standard': {
            'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d]: %(message)s',
            'datefmt': '%Y-%m-%d %H:%M:%S'
        },
-        'graylog': {
-            'format': '[%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
-                      '[Thread: %(threadName)s]: %(message)s',
-            'datefmt': '%Y-%m-%d %H:%M:%S',
-            '()': GraylogFormatter
-        },
        'tuning': {
            '()': TuningFormatter,
            'datefmt': '%Y-%m-%d %H:%M:%S UTC'
+        },
+        'json': {
+            'format': '%(message)s',
+            'class': 'logging.Formatter' if not 'pythonjsonlogger' in sys.modules else 'pythonjsonlogger.jsonlogger.JsonFormatter',
+            'json_default': lambda obj: str(obj) if isinstance(obj, (dt, Exception)) else None,
+            'json_ensure_ascii': False,
+            'rename_fields': {
+                'asctime': 'timestamp',
+                'levelname': 'severity'
+            },
+            'timestamp': True,
+            'datefmt': '%Y-%m-%dT%H:%M:%S.%fZ'
        }
    },
    'loggers': {
        'eveai_app': {  # logger for the eveai_app
-            'handlers': ['file_app', 'graylog', ] if env == 'production' else ['file_app', ],
+            'handlers': ['file_app'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_workers': {  # logger for the eveai_workers
-            'handlers': ['file_workers', 'graylog', ] if env == 'production' else ['file_workers', ],
+            'handlers': ['file_workers'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_chat_client': {  # logger for the eveai_chat
-            'handlers': ['file_chat_client', 'graylog', ] if env == 'production' else ['file_chat_client', ],
+            'handlers': ['file_chat_client'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_chat_workers': {  # logger for the eveai_chat_workers
-            'handlers': ['file_chat_workers', 'graylog', ] if env == 'production' else ['file_chat_workers', ],
+            'handlers': ['file_chat_workers'],
            'level': 'DEBUG',
            'propagate': False
        },
-        'eveai_api': {  # logger for the eveai_chat_workers
-            'handlers': ['file_api', 'graylog', ] if env == 'production' else ['file_api', ],
+        'eveai_api': {  # logger for the eveai_api
+            'handlers': ['file_api'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_beat': {  # logger for the eveai_beat
-            'handlers': ['file_beat', 'graylog', ] if env == 'production' else ['file_beat', ],
+            'handlers': ['file_beat'],
            'level': 'DEBUG',
            'propagate': False
        },
        'eveai_entitlements': {  # logger for the eveai_entitlements
-            'handlers': ['file_entitlements', 'graylog', ] if env == 'production' else ['file_entitlements', ],
+            'handlers': ['file_entitlements'],
            'level': 'DEBUG',
            'propagate': False
        },
        'sqlalchemy.engine': {  # logger for the sqlalchemy
-            'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ],
+            'handlers': ['file_sqlalchemy'],
            'level': 'DEBUG',
            'propagate': False
        },
        'security': {  # logger for the security
-            'handlers': ['file_security', 'graylog', ] if env == 'production' else ['file_security', ],
+            'handlers': ['file_security'],
            'level': 'DEBUG',
            'propagate': False
        },
        'business_events': {
-            'handlers': ['file_business_events', 'graylog'],
+            'handlers': ['file_business_events'],
            'level': 'DEBUG',
            'propagate': False
        },
        # Single tuning logger
        'tuning': {
-            'handlers': ['tuning_file', 'graylog'] if env == 'production' else ['tuning_file'],
+            'handlers': ['tuning_file'],
            'level': 'DEBUG',
            'propagate': False,
        },
        '': {  # root logger
-            'handlers': ['console'],
+            'handlers': ['console'] if os.environ.get('KUBERNETES_SERVICE_HOST') is None else ['json_console'],
            'level': 'WARNING',  # Set higher level for root to minimize noise
            'propagate': False
        },
    }
-}
+}