- adding usage to specialist execution

- Correcting implementation of usage - Removed some obsolete debug statements
2025-03-07 11:10:28 +01:00
parent efff63043a
commit 5bfd3445bb
21 changed files with 215 additions and 255 deletions
--- a/common/langchain/templates/template_manager.py
+++ b/common/langchain/templates/template_manager.py
@@ -28,7 +28,6 @@ class TemplateManager:
        # Initialize template manager
        base_dir = "/app"
        self.templates_dir = os.path.join(base_dir, 'config', 'prompts')
-        app.logger.debug(f'Loading templates from {self.templates_dir}')
        self.app = app
        self._templates = self._load_templates()
        # Log available templates for each supported model
--- a/common/models/entitlements.py
+++ b/common/models/entitlements.py
@@ -11,7 +11,7 @@ class BusinessEventLog(db.Model):
    tenant_id = db.Column(db.Integer, nullable=False)
    trace_id = db.Column(db.String(50), nullable=False)
    span_id = db.Column(db.String(50))
-    span_name = db.Column(db.String(50))
+    span_name = db.Column(db.String(255))
    parent_span_id = db.Column(db.String(50))
    document_version_id = db.Column(db.Integer)
    document_version_file_size = db.Column(db.Float)
--- a/common/utils/business_event.py
+++ b/common/utils/business_event.py
@@ -2,13 +2,14 @@ import os
 import uuid
 from contextlib import contextmanager
 from datetime import datetime
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, List
 from datetime import datetime as dt, timezone as tz
 import logging

 from .business_event_context import BusinessEventContext
 from common.models.entitlements import BusinessEventLog
 from common.extensions import db
+from .celery_utils import current_celery


 class BusinessEvent:
@@ -38,6 +39,7 @@ class BusinessEvent:
            'call_count': 0,
            'interaction_type': None
        }
+        self._log_buffer = []

    def update_attribute(self, attribute: str, value: any):
        if hasattr(self, attribute):
@@ -80,7 +82,7 @@ class BusinessEvent:
        self.span_name = span_name
        self.parent_span_id = parent_span_id

-        self.log(f"Starting span {span_name}")
+        self.log(f"Start")

        try:
            yield
@@ -88,7 +90,7 @@ class BusinessEvent:
            if self.llm_metrics['call_count'] > 0:
                self.log_final_metrics()
                self.reset_llm_metrics()
-            self.log(f"Ending span {span_name}")
+            self.log(f"End")
            # Restore the previous span info
            if self.spans:
                self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
@@ -98,8 +100,8 @@ class BusinessEvent:
                self.parent_span_id = None

    def log(self, message: str, level: str = 'info'):
-        logger = logging.getLogger('business_events')
        log_data = {
+            'timestamp': dt.now(tz=tz.utc),
            'event_type': self.event_type,
            'tenant_id': self.tenant_id,
            'trace_id': self.trace_id,
@@ -111,34 +113,16 @@ class BusinessEvent:
            'chat_session_id': self.chat_session_id,
            'interaction_id': self.interaction_id,
            'environment': self.environment,
+            'message': message,
        }
-        # log to Graylog
-        getattr(logger, level)(message, extra=log_data)
-
-        # Log to database
-        event_log = BusinessEventLog(
-            timestamp=dt.now(tz=tz.utc),
-            event_type=self.event_type,
-            tenant_id=self.tenant_id,
-            trace_id=self.trace_id,
-            span_id=self.span_id,
-            span_name=self.span_name,
-            parent_span_id=self.parent_span_id,
-            document_version_id=self.document_version_id,
-            document_version_file_size=self.document_version_file_size,
-            chat_session_id=self.chat_session_id,
-            interaction_id=self.interaction_id,
-            environment=self.environment,
-            message=message
-        )
-        db.session.add(event_log)
-        db.session.commit()
+        self._log_buffer.append(log_data)

    def log_llm_metrics(self, metrics: dict, level: str = 'info'):
        self.update_llm_metrics(metrics)
        message = "LLM Metrics"
        logger = logging.getLogger('business_events')
        log_data = {
+            'timestamp': dt.now(tz=tz.utc),
            'event_type': self.event_type,
            'tenant_id': self.tenant_id,
            'trace_id': self.trace_id,
@@ -155,38 +139,15 @@ class BusinessEvent:
            'llm_metrics_completion_tokens': metrics['completion_tokens'],
            'llm_metrics_total_time': metrics['time_elapsed'],
            'llm_interaction_type': metrics['interaction_type'],
+            'message': message,
        }
-        # log to Graylog
-        getattr(logger, level)(message, extra=log_data)
-
-        # Log to database
-        event_log = BusinessEventLog(
-            timestamp=dt.now(tz=tz.utc),
-            event_type=self.event_type,
-            tenant_id=self.tenant_id,
-            trace_id=self.trace_id,
-            span_id=self.span_id,
-            span_name=self.span_name,
-            parent_span_id=self.parent_span_id,
-            document_version_id=self.document_version_id,
-            document_version_file_size=self.document_version_file_size,
-            chat_session_id=self.chat_session_id,
-            interaction_id=self.interaction_id,
-            environment=self.environment,
-            llm_metrics_total_tokens=metrics['total_tokens'],
-            llm_metrics_prompt_tokens=metrics['prompt_tokens'],
-            llm_metrics_completion_tokens=metrics['completion_tokens'],
-            llm_metrics_total_time=metrics['time_elapsed'],
-            llm_interaction_type=metrics['interaction_type'],
-            message=message
-        )
-        db.session.add(event_log)
-        db.session.commit()
+        self._log_buffer.append(log_data)

    def log_final_metrics(self, level: str = 'info'):
        logger = logging.getLogger('business_events')
        message = "Final LLM Metrics"
        log_data = {
+            'timestamp': dt.now(tz=tz.utc),
            'event_type': self.event_type,
            'tenant_id': self.tenant_id,
            'trace_id': self.trace_id,
@@ -204,34 +165,65 @@ class BusinessEvent:
            'llm_metrics_total_time': self.llm_metrics['total_time'],
            'llm_metrics_call_count': self.llm_metrics['call_count'],
            'llm_interaction_type': self.llm_metrics['interaction_type'],
+            'message': message,
        }
-        # log to Graylog
-        getattr(logger, level)(message, extra=log_data)
+        self._log_buffer.append(log_data)

-        # Log to database
-        event_log = BusinessEventLog(
-            timestamp=dt.now(tz=tz.utc),
-            event_type=self.event_type,
-            tenant_id=self.tenant_id,
-            trace_id=self.trace_id,
-            span_id=self.span_id,
-            span_name=self.span_name,
-            parent_span_id=self.parent_span_id,
-            document_version_id=self.document_version_id,
-            document_version_file_size=self.document_version_file_size,
-            chat_session_id=self.chat_session_id,
-            interaction_id=self.interaction_id,
-            environment=self.environment,
-            llm_metrics_total_tokens=self.llm_metrics['total_tokens'],
-            llm_metrics_prompt_tokens=self.llm_metrics['prompt_tokens'],
-            llm_metrics_completion_tokens=self.llm_metrics['completion_tokens'],
-            llm_metrics_total_time=self.llm_metrics['total_time'],
-            llm_metrics_call_count=self.llm_metrics['call_count'],
-            llm_interaction_type=self.llm_metrics['interaction_type'],
-            message=message
-        )
-        db.session.add(event_log)
-        db.session.commit()
+    @staticmethod
+    def _direct_db_persist(log_entries: List[Dict[str, Any]]):
+        """Fallback method to directly persist logs to DB if async fails"""
+        try:
+            db_entries = []
+            for entry in log_entries:
+                event_log = BusinessEventLog(
+                    timestamp=entry.pop('timestamp'),
+                    event_type=entry.pop('event_type'),
+                    tenant_id=entry.pop('tenant_id'),
+                    trace_id=entry.pop('trace_id'),
+                    span_id=entry.pop('span_id', None),
+                    span_name=entry.pop('span_name', None),
+                    parent_span_id=entry.pop('parent_span_id', None),
+                    document_version_id=entry.pop('document_version_id', None),
+                    document_version_file_size=entry.pop('document_version_file_size', None),
+                    chat_session_id=entry.pop('chat_session_id', None),
+                    interaction_id=entry.pop('interaction_id', None),
+                    environment=entry.pop('environment', None),
+                    llm_metrics_total_tokens=entry.pop('llm_metrics_total_tokens', None),
+                    llm_metrics_prompt_tokens=entry.pop('llm_metrics_prompt_tokens', None),
+                    llm_metrics_completion_tokens=entry.pop('llm_metrics_completion_tokens', None),
+                    llm_metrics_total_time=entry.pop('llm_metrics_total_time', None),
+                    llm_metrics_call_count=entry.pop('llm_metrics_call_count', None),
+                    llm_interaction_type=entry.pop('llm_interaction_type', None),
+                    message=entry.pop('message', None)
+                )
+                db_entries.append(event_log)
+
+            # Bulk insert
+            db.session.bulk_save_objects(db_entries)
+            db.session.commit()
+        except Exception as e:
+            logger = logging.getLogger('business_events')
+            logger.error(f"Failed to persist logs directly to DB: {e}")
+            db.session.rollback()
+
+    def _flush_log_buffer(self):
+        """Flush the log buffer to the database via a Celery task"""
+        if self._log_buffer:
+            try:
+                # Send to Celery task
+                current_celery.send_task(
+                    'persist_business_events',
+                    args=[self._log_buffer],
+                    queue='entitlements'  # Or dedicated log queue
+                )
+            except Exception as e:
+                # Fallback to direct DB write in case of issues with Celery
+                logger = logging.getLogger('business_events')
+                logger.error(f"Failed to send logs to Celery. Falling back to direct DB: {e}")
+                self._direct_db_persist(self._log_buffer)
+
+            # Clear the buffer after sending
+            self._log_buffer = []

    def __enter__(self):
        self.log(f'Starting Trace for {self.event_type}')
@@ -242,4 +234,5 @@ class BusinessEvent:
            self.log_final_metrics()
            self.reset_llm_metrics()
        self.log(f'Ending Trace for {self.event_type}')
+        self._flush_log_buffer()
        return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb)
--- a/common/utils/cache/base.py
+++ b/common/utils/cache/base.py
@@ -120,9 +120,6 @@ class CacheHandler(Generic[T]):

        region_name = getattr(self.region, 'name', 'default_region')

-        current_app.logger.debug(f"Generating cache key in region {region_name} with prefix {self.prefix} "
-                                 f"for {self._key_components}")
-
        key = CacheKey({k: identifiers[k] for k in self._key_components})
        return f"{region_name}_{self.prefix}:{str(key)}"

@@ -138,13 +135,10 @@ class CacheHandler(Generic[T]):
            Cached or newly created value
        """
        cache_key = self.generate_key(**identifiers)
-        current_app.logger.debug(f"Getting Cache key: {cache_key}")

        def creator():
            instance = creator_func(**identifiers)
-            current_app.logger.debug("Caching object created and received. Now serializing...")
            serialized_instance = self._to_cache_data(instance)
-            current_app.logger.debug(f"Caching object serialized and received:\n{serialized_instance}")
            return serialized_instance

        cached_data = self.region.get_or_create(
--- a/common/utils/cache/config_cache.py
+++ b/common/utils/cache/config_cache.py
@@ -69,10 +69,8 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
        Returns:
            Configuration data
        """
-        current_app.logger.debug(f"Loading specific configuration for {type_name}, version: {version_str} - no cache")
        version_tree = self.version_tree_cache.get_versions(type_name)
        versions = version_tree['versions']
-        current_app.logger.debug(f"Loaded specific versions for {type_name}, versions: {versions}")

        if version_str == 'latest':
            version_str = version_tree['latest_version']
@@ -81,12 +79,10 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
            raise ValueError(f"Version {version_str} not found for {type_name}")

        file_path = versions[version_str]['file_path']
-        current_app.logger.debug(f'Trying to load configuration from {file_path}')

        try:
            with open(file_path) as f:
                config = yaml.safe_load(f)
-                current_app.logger.debug(f"Loaded config for {type_name}{version_str}: {config}")
                return config
        except Exception as e:
            raise ValueError(f"Error loading config from {file_path}: {e}")
@@ -103,8 +99,6 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]):
        Returns:
            Configuration data
        """
-        current_app.logger.debug(f"Trying to retrieve config for {self.config_type}, type name: {type_name}, "
-                                 f"version: {version}")
        if version is None:
            version_str = self.version_tree_cache.get_latest_version(type_name)
        elif is_major_minor(version):
@@ -145,7 +139,6 @@ class BaseConfigVersionTreeCacheHandler(CacheHandler[Dict[str, Any]]):
        Returns:
            Dict containing available versions and their metadata
        """
-        current_app.logger.debug(f"Loading version tree for {type_name} - no cache")
        type_path = Path(self._config_dir) / type_name
        if not type_path.exists():
            raise ValueError(f"No configuration found for type {type_name}")
@@ -180,8 +173,6 @@ class BaseConfigVersionTreeCacheHandler(CacheHandler[Dict[str, Any]]):
                current_app.logger.error(f"Error loading version {ver}: {e}")
                continue

-        current_app.logger.debug(f"Loaded versions for {type_name}: {versions}")
-        current_app.logger.debug(f"Latest version for {type_name}: {latest_version}")
        return {
            'versions': versions,
            'latest_version': latest_version
@@ -221,7 +212,6 @@ class BaseConfigVersionTreeCacheHandler(CacheHandler[Dict[str, Any]]):
        Returns:
            Dict with version information
        """
-        current_app.logger.debug(f"Trying to get version tree for {self.config_type}, {type_name}")
        return self.get(
            lambda type_name: self._load_version_tree(type_name),
            type_name=type_name
@@ -319,7 +309,6 @@ class BaseConfigTypesCacheHandler(CacheHandler[Dict[str, Any]]):

    def _load_type_definitions(self) -> Dict[str, Dict[str, str]]:
        """Load type definitions from the corresponding type_defs module"""
-        current_app.logger.debug(f"Loading type definitions for {self.config_type} - no cache")
        if not self._types_module:
            raise ValueError("_types_module must be set by subclass")

@@ -331,13 +320,10 @@ class BaseConfigTypesCacheHandler(CacheHandler[Dict[str, Any]]):
            for type_id, info in self._types_module.items()
        }

-        current_app.logger.debug(f"Loaded type definitions: {type_definitions}")
-
        return type_definitions

    def get_types(self) -> Dict[str, Dict[str, str]]:
        """Get dictionary of available types with name and description"""
-        current_app.logger.debug(f"Trying to retrieve type definitions for {self.config_type}")
        result = self.get(
            lambda type_name: self._load_type_definitions(),
            type_name=f'{self.config_type}_types',
--- a/common/utils/cache/crewai_config_processor.py
+++ b/common/utils/cache/crewai_config_processor.py
@@ -211,8 +211,6 @@ class BaseCrewAIConfigProcessor:
                tasks=self._process_task_configs(specialist_config),
                tools=self._process_tool_configs(specialist_config)
            )
-            current_app.logger.debug(f"Processed config for tenant {self.tenant_id}, specialist {self.specialist_id}:\n"
-                                     f"{processed_config}")
            return processed_config

        except Exception as e:
--- a/common/utils/celery_utils.py
+++ b/common/utils/celery_utils.py
@@ -58,39 +58,6 @@ def init_celery(celery, app, is_beat=False):

    celery.Task = ContextTask

-# Original init_celery before updating for beat
-# def init_celery(celery, app):
-#     celery_app.main = app.name
-#     app.logger.debug(f'CELERY_BROKER_URL: {app.config["CELERY_BROKER_URL"]}')
-#     app.logger.debug(f'CELERY_RESULT_BACKEND: {app.config["CELERY_RESULT_BACKEND"]}')
-#     celery_config = {
-#         'broker_url': app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'),
-#         'result_backend': app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0'),
-#         'task_serializer': app.config.get('CELERY_TASK_SERIALIZER', 'json'),
-#         'result_serializer': app.config.get('CELERY_RESULT_SERIALIZER', 'json'),
-#         'accept_content': app.config.get('CELERY_ACCEPT_CONTENT', ['json']),
-#         'timezone': app.config.get('CELERY_TIMEZONE', 'UTC'),
-#         'enable_utc': app.config.get('CELERY_ENABLE_UTC', True),
-#         'task_routes': {'eveai_worker.tasks.create_embeddings': {'queue': 'embeddings',
-#                                                                  'routing_key': 'embeddings.create_embeddings'}},
-#     }
-#     celery_app.conf.update(**celery_config)
-#
-#     # Setting up Celery task queues
-#     celery_app.conf.task_queues = (
-#         Queue('default', routing_key='task.#'),
-#         Queue('embeddings', routing_key='embeddings.#', queue_arguments={'x-max-priority': 10}),
-#         Queue('llm_interactions', routing_key='llm_interactions.#', queue_arguments={'x-max-priority': 5}),
-#     )
-#
-#     # Ensuring tasks execute with Flask application context
-#     class ContextTask(celery.Task):
-#         def __call__(self, *args, **kwargs):
-#             with app.app_context():
-#                 return self.run(*args, **kwargs)
-#
-#     celery.Task = ContextTask
-

 def make_celery(app_name, config):
    return celery_app
--- a/common/utils/debug_utils.py
+++ b/common/utils/debug_utils.py
@@ -5,58 +5,11 @@ import json


 def log_request_middleware(app):
-    # @app.before_request
-    # def log_request_info():
-    #     start_time = time.time()
-    #     app.logger.debug(f"Request URL: {request.url}")
-    #     app.logger.debug(f"Request Method: {request.method}")
-    #     app.logger.debug(f"Request Headers: {request.headers}")
-    #     app.logger.debug(f"Time taken for logging request info: {time.time() - start_time} seconds")
-    #     try:
-    #         app.logger.debug(f"Request Body: {request.get_data()}")
-    #     except Exception as e:
-    #         app.logger.error(f"Error reading request body: {e}")
-    #     app.logger.debug(f"Time taken for logging request body: {time.time() - start_time} seconds")
-
-    # @app.before_request
-    # def check_csrf_token():
-    #     start_time = time.time()
-    #     if request.method == "POST":
-    #         csrf_token = request.form.get("csrf_token")
-    #         app.logger.debug(f"CSRF Token: {csrf_token}")
-    #     app.logger.debug(f"Time taken for logging CSRF token: {time.time() - start_time} seconds")
-
-    # @app.before_request
-    # def log_user_info():
-    #     if current_user and current_user.is_authenticated:
-    #         app.logger.debug(f"Before: User ID: {current_user.id}")
-    #         app.logger.debug(f"Before: User Email: {current_user.email}")
-    #         app.logger.debug(f"Before: User Roles: {current_user.roles}")
-    #     else:
-    #         app.logger.debug("After: No user logged in")

    @app.before_request
    def log_session_state_before():
        pass

-    # @app.after_request
-    # def log_response_info(response):
-    #     start_time = time.time()
-    #     app.logger.debug(f"Response Status: {response.status}")
-    #     app.logger.debug(f"Response Headers: {response.headers}")
-    #
-    #     app.logger.debug(f"Time taken for logging response info: {time.time() - start_time} seconds")
-    #     return response
-
-    # @app.after_request
-    # def log_user_after_request(response):
-    #     if current_user and current_user.is_authenticated:
-    #         app.logger.debug(f"After: User ID: {current_user.id}")
-    #         app.logger.debug(f"after: User Email: {current_user.email}")
-    #         app.logger.debug(f"After: User Roles: {current_user.roles}")
-    #     else:
-    #         app.logger.debug("After: No user logged in")
-
    @app.after_request
    def log_session_state_after(response):
        return response
@@ -149,8 +102,3 @@ def register_request_debugger(app):
        # Format the debug info as a pretty-printed JSON string with indentation
        formatted_debug_info = json.dumps(debug_info, indent=2, sort_keys=True)

-        # Log everything in a single statement
-        app.logger.debug(
-            "Request Debug Information\n",
-            extra={"request_debug\n": formatted_debug_info}
-        )
--- a/common/utils/document_utils.py
+++ b/common/utils/document_utils.py
@@ -367,7 +367,6 @@ def mark_tenant_storage_dirty(tenant_id):


 def cope_with_local_url(url):
-    current_app.logger.debug(f'Incomming URL: {url}')
    parsed_url = urlparse(url)
    # Check if this is an internal WordPress URL (TESTING) and rewrite it
    if parsed_url.netloc in [current_app.config['EXTERNAL_WORDPRESS_BASE_URL']]:
@@ -376,7 +375,6 @@ def cope_with_local_url(url):
            netloc=f"{current_app.config['WORDPRESS_HOST']}:{current_app.config['WORDPRESS_PORT']}"
        )
        url = urlunparse(parsed_url)
-        current_app.logger.debug(f'Translated Wordpress URL to: {url}')

    return url

@@ -412,10 +410,6 @@ def lookup_document(tenant_id: int, lookup_criteria: dict, metadata_type: str) -
        for key, value in lookup_criteria.items():
            query = query.filter(metadata_field[key].astext == str(value))

-        # Log the final SQL query
-        current_app.logger.debug(
-            f"Final SQL query: {query.statement.compile(compile_kwargs={'literal_binds': True})}")
-
        # Get first result
        result = query.first()

--- a/common/utils/specialist_utils.py
+++ b/common/utils/specialist_utils.py
@@ -99,13 +99,11 @@ def _create_agent(
        timestamp: Optional[dt] = None
 ) -> EveAIAgent:
    """Create an agent with the given configuration."""
-    current_app.logger.debug(f"Creating agent {agent_type} {agent_version} with {name}, {description}")
    if timestamp is None:
        timestamp = dt.now(tz=tz.utc)

    # Get agent configuration from cache
    agent_config = cache_manager.agents_config_cache.get_config(agent_type, agent_version)
-    current_app.logger.debug(f"Agent Config: {agent_config}")

    agent = EveAIAgent(
        specialist_id=specialist_id,
@@ -142,7 +140,6 @@ def _create_task(

    # Get task configuration from cache
    task_config = cache_manager.tasks_config_cache.get_config(task_type, task_version)
-    current_app.logger.debug(f"Task Config: {task_config}")

    task = EveAITask(
        specialist_id=specialist_id,
@@ -180,7 +177,6 @@ def _create_tool(

    # Get tool configuration from cache
    tool_config = cache_manager.tools_config_cache.get_config(tool_type, tool_version)
-    current_app.logger.debug(f"Tool Config: {tool_config}")

    tool = EveAITool(
        specialist_id=specialist_id,
--- a/common/utils/startup_eveai.py
+++ b/common/utils/startup_eveai.py
@@ -28,15 +28,10 @@ def perform_startup_invalidation(app):
            try:
                # Check if invalidation was already performed
                if not redis_client.get(marker_key):
-                    app.logger.debug(f"Performing cache invalidation at startup time {startup_time}")
-                    app.logger.debug(f"Current cache keys: {redis_client.keys('*')}")
-
                    # Perform invalidation
                    cache_manager.invalidate_region('eveai_config')
                    cache_manager.invalidate_region('eveai_chat_workers')

-                    app.logger.debug(f"Cache keys after invalidation: {redis_client.keys('*')}")
-
                    redis_client.setex(marker_key, 180, str(startup_time))
                    app.logger.info("Startup cache invalidation completed")
                else: