From 4ea16521e20d4d4073dbcee280f51479eb5c907a Mon Sep 17 00:00:00 2001
From: Josako <pieter.laroy@flow-it.net>
Date: Tue, 25 Mar 2025 15:48:00 +0100
Subject: [PATCH] - Prometheus metrics go via pushgateway, as different worker
 processes might have different registries that are not picked up by
 Prometheus

---
 CHANGELOG.md                           |  32 ++++++
 common/extensions.py                   |   2 -
 common/utils/business_event.py         | 151 ++++++++++++++++++-------
 common/utils/performance_monitoring.py |  59 ----------
 common/utils/prometheus_utils.py       |  11 ++
 config/config.py                       |  41 ++++---
 docker/compose_dev.yaml                |  10 ++
 docker/prometheus/prometheus.yml       |   5 +
 eveai_chat_workers/__init__.py         |   3 +-
 9 files changed, 191 insertions(+), 123 deletions(-)
 delete mode 100644 common/utils/performance_monitoring.py
 create mode 100644 common/utils/prometheus_utils.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 984e8b1..654855e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Security
 - In case of vulnerabilities.
 
+## [2.2.0-alfa]
+
+### Added
+- Mistral AI as main provider for embeddings, chains and specialists
+- Usage measuring for specialists
+- RAG from chain to specialist technology
+- Dossier catalog management possibilities added to eveai_app
+- Asset definition (Paused - other priorities)
+- Prometheus and Grafana
+- Add prometheus monitoring to business events
+- Asynchronous execution of specialists
+
+### Changed
+- Moved choice for AI providers / models to specialists and prompts
+- Improve RAG to not repeat historic answers
+- Fixed embedding model, no more choices allowed
+- clean url (of tracking parameters) before adding it to a catalog
+
+### Deprecated
+- For soon-to-be removed features.
+
+### Removed
+- Add Multiple URLs removed from menu
+- Old Specialist items removed from interaction menu
+- 
+
+### Fixed
+- Set default language when registering Documents or URLs.
+
+### Security
+- In case of vulnerabilities.
+
 ## [2.1.0-alfa]
 
 ### Added
diff --git a/common/extensions.py b/common/extensions.py
index c38fa7f..df4fc8d 100644
--- a/common/extensions.py
+++ b/common/extensions.py
@@ -15,7 +15,6 @@ from .langchain.templates.template_manager import TemplateManager
 from .utils.cache.eveai_cache_manager import EveAICacheManager
 from .utils.simple_encryption import SimpleEncryption
 from .utils.minio_utils import MinioClient
-from .utils.performance_monitoring import EveAIMetrics
 
 
 # Create extensions
@@ -35,5 +34,4 @@ minio_client = MinioClient()
 metrics = PrometheusMetrics.for_app_factory()
 template_manager = TemplateManager()
 cache_manager = EveAICacheManager()
-eveai_metrics = EveAIMetrics()
 
diff --git a/common/utils/business_event.py b/common/utils/business_event.py
index 1cb8feb..2e5b150 100644
--- a/common/utils/business_event.py
+++ b/common/utils/business_event.py
@@ -6,16 +6,18 @@ from datetime import datetime
 from typing import Dict, Any, Optional, List
 from datetime import datetime as dt, timezone as tz
 import logging
-from prometheus_client import Counter, Histogram, Gauge, Summary
+
+from flask import current_app
+from prometheus_client import Counter, Histogram, Gauge, Summary, push_to_gateway, REGISTRY
 
 from .business_event_context import BusinessEventContext
 from common.models.entitlements import BusinessEventLog
 from common.extensions import db
 from .celery_utils import current_celery
-from common.utils.performance_monitoring import EveAIMetrics
+from common.utils.prometheus_utils import sanitize_label
 
 # Standard duration buckets for all histograms
-DURATION_BUCKETS = EveAIMetrics.get_standard_buckets()
+DURATION_BUCKETS = [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')]
 
 # Prometheus metrics for business events
 TRACE_COUNTER = Counter(
@@ -112,14 +114,24 @@ class BusinessEvent:
 
         # Prometheus label values must be strings
         self.tenant_id_str = str(self.tenant_id)
+        self.event_type_str = sanitize_label(self.event_type)
         self.specialist_id_str = str(self.specialist_id) if self.specialist_id else ""
         self.specialist_type_str = str(self.specialist_type) if self.specialist_type else ""
-        self.specialist_type_version_str = str(self.specialist_type_version) if self.specialist_type_version else ""
+        self.specialist_type_version_str = sanitize_label(str(self.specialist_type_version)) \
+            if self.specialist_type_version else ""
+        self.span_name_str = ""
+
+        current_app.logger.debug(f"Labels for metrics: "
+                                 f"tenant_id={self.tenant_id_str}, "
+                                 f"event_type={self.event_type_str},"
+                                 f"specialist_id={self.specialist_id_str}, "
+                                 f"specialist_type={self.specialist_type_str}, " +
+                                 f"specialist_type_version={self.specialist_type_version_str}")
 
         # Increment concurrent events gauge when initialized
         CONCURRENT_TRACES.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
+            event_type=self.event_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
@@ -128,12 +140,14 @@ class BusinessEvent:
         # Increment trace counter
         TRACE_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
+            event_type=self.event_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
         ).inc()
 
+        self._push_to_gateway()
+
     def update_attribute(self, attribute: str, value: any):
         if hasattr(self, attribute):
             setattr(self, attribute, value)
@@ -143,9 +157,13 @@ class BusinessEvent:
             elif attribute == 'specialist_type':
                 self.specialist_type_str = str(value) if value else ""
             elif attribute == 'specialist_type_version':
-                self.specialist_type_version_str = str(value) if value else ""
+                self.specialist_type_version_str = sanitize_label(str(value)) if value else ""
             elif attribute == 'tenant_id':
                 self.tenant_id_str = str(value)
+            elif attribute == 'event_type':
+                self.event_type_str = sanitize_label(value)
+            elif attribute == 'span_name':
+                self.span_name_str = sanitize_label(value)
         else:
             raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'")
 
@@ -158,13 +176,21 @@ class BusinessEvent:
         self.llm_metrics['interaction_type'] = metrics['interaction_type']
 
         # Track in Prometheus metrics
-        interaction_type = metrics['interaction_type']
+        interaction_type_str = sanitize_label(metrics['interaction_type']) if metrics['interaction_type'] else ""
+
+        current_app.logger.debug(f"Labels for metrics: "
+                                 f"tenant_id={self.tenant_id_str}, "
+                                 f"event_type={self.event_type_str},"
+                                 f"interaction_type={interaction_type_str}, "
+                                 f"specialist_id={self.specialist_id_str}, "
+                                 f"specialist_type={self.specialist_type_str}, "
+                                 f"specialist_type_version={self.specialist_type_version_str}")
 
         # Track token usage
         LLM_TOKENS_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            interaction_type=interaction_type,
+            event_type=self.event_type_str,
+            interaction_type=interaction_type_str,
             token_type='total',
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
@@ -173,8 +199,8 @@ class BusinessEvent:
 
         LLM_TOKENS_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            interaction_type=interaction_type,
+            event_type=self.event_type_str,
+            interaction_type=interaction_type_str,
             token_type='prompt',
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
@@ -183,8 +209,8 @@ class BusinessEvent:
 
         LLM_TOKENS_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            interaction_type=interaction_type,
+            event_type=self.event_type_str,
+            interaction_type=interaction_type_str,
             token_type='completion',
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
@@ -194,8 +220,8 @@ class BusinessEvent:
         # Track duration
         LLM_DURATION.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            interaction_type=interaction_type,
+            event_type=self.event_type_str,
+            interaction_type=interaction_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
@@ -204,13 +230,15 @@ class BusinessEvent:
         # Track call count
         LLM_CALLS_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            interaction_type=interaction_type,
+            event_type=self.event_type_str,
+            interaction_type=interaction_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
         ).inc()
 
+        self._push_to_gateway()
+
     def reset_llm_metrics(self):
         self.llm_metrics['total_tokens'] = 0
         self.llm_metrics['prompt_tokens'] = 0
@@ -236,16 +264,25 @@ class BusinessEvent:
         # Set the new span info
         self.span_id = new_span_id
         self.span_name = span_name
+        self.span_name_str = sanitize_label(span_name) if span_name else ""
         self.parent_span_id = parent_span_id
 
         # Track start time for the span
         span_start_time = time.time()
 
+        current_app.logger.debug(f"Labels for metrics: "
+                                 f"tenant_id={self.tenant_id_str}, "
+                                 f"event_type={self.event_type_str}, "
+                                 f"activity_name={self.span_name_str}, "
+                                 f"specialist_id={self.specialist_id_str}, "
+                                 f"specialist_type={self.specialist_type_str}, "
+                                 f"specialist_type_version={self.specialist_type_version_str}")
+
         # Increment span metrics - using span_name as activity_name for metrics
         SPAN_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            activity_name=span_name,
+            event_type=self.event_type_str,
+            activity_name=self.span_name_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
@@ -254,13 +291,15 @@ class BusinessEvent:
         # Increment concurrent spans gauge
         CONCURRENT_SPANS.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            activity_name=span_name,
+            event_type=self.event_type_str,
+            activity_name=self.span_name_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
         ).inc()
 
+        self._push_to_gateway()
+
         self.log(f"Start")
 
         try:
@@ -272,8 +311,8 @@ class BusinessEvent:
             # Observe span duration
             SPAN_DURATION.labels(
                 tenant_id=self.tenant_id_str,
-                event_type=self.event_type,
-                activity_name=span_name,
+                event_type=self.event_type_str,
+                activity_name=self.span_name_str,
                 specialist_id=self.specialist_id_str,
                 specialist_type=self.specialist_type_str,
                 specialist_type_version=self.specialist_type_version_str
@@ -282,13 +321,15 @@ class BusinessEvent:
             # Decrement concurrent spans gauge
             CONCURRENT_SPANS.labels(
                 tenant_id=self.tenant_id_str,
-                event_type=self.event_type,
-                activity_name=span_name,
+                event_type=self.event_type_str,
+                activity_name=self.span_name_str,
                 specialist_id=self.specialist_id_str,
                 specialist_type=self.specialist_type_str,
                 specialist_type_version=self.specialist_type_version_str
             ).dec()
 
+            self._push_to_gateway()
+
             if self.llm_metrics['call_count'] > 0:
                 self.log_final_metrics()
                 self.reset_llm_metrics()
@@ -296,10 +337,12 @@ class BusinessEvent:
             # Restore the previous span info
             if self.spans:
                 self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
+                self.span_name_str = sanitize_label(span_name) if span_name else ""
             else:
                 self.span_id = None
                 self.span_name = None
                 self.parent_span_id = None
+                self.span_name_str = ""
 
     @asynccontextmanager
     async def create_span_async(self, span_name: str):
@@ -314,16 +357,25 @@ class BusinessEvent:
         # Set the new span info
         self.span_id = new_span_id
         self.span_name = span_name
+        self.span_name_str = sanitize_label(span_name) if span_name else ""
         self.parent_span_id = parent_span_id
 
         # Track start time for the span
         span_start_time = time.time()
 
+        current_app.logger.debug(f"Labels for metrics: "
+                                 f"tenant_id={self.tenant_id_str}, "
+                                 f"event_type={self.event_type_str}, "
+                                 f"activity_name={self.span_name_str}, "
+                                 f"specialist_id={self.specialist_id_str}, "
+                                 f"specialist_type={self.specialist_type_str}, "
+                                 f"specialist_type_version={self.specialist_type_version_str}")
+
         # Increment span metrics - using span_name as activity_name for metrics
         SPAN_COUNTER.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            activity_name=span_name,
+            event_type=self.event_type_str,
+            activity_name=self.span_name_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
@@ -332,13 +384,15 @@ class BusinessEvent:
         # Increment concurrent spans gauge
         CONCURRENT_SPANS.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
-            activity_name=span_name,
+            event_type=self.event_type_str,
+            activity_name=self.span_name_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
         ).inc()
 
+        self._push_to_gateway()
+
         self.log(f"Start")
 
         try:
@@ -350,8 +404,8 @@ class BusinessEvent:
             # Observe span duration
             SPAN_DURATION.labels(
                 tenant_id=self.tenant_id_str,
-                event_type=self.event_type,
-                activity_name=span_name,
+                event_type=self.event_type_str,
+                activity_name=self.span_name_str,
                 specialist_id=self.specialist_id_str,
                 specialist_type=self.specialist_type_str,
                 specialist_type_version=self.specialist_type_version_str
@@ -360,13 +414,15 @@ class BusinessEvent:
             # Decrement concurrent spans gauge
             CONCURRENT_SPANS.labels(
                 tenant_id=self.tenant_id_str,
-                event_type=self.event_type,
-                activity_name=span_name,
+                event_type=self.event_type_str,
+                activity_name=self.span_name_str,
                 specialist_id=self.specialist_id_str,
                 specialist_type=self.specialist_type_str,
                 specialist_type_version=self.specialist_type_version_str
             ).dec()
 
+            self._push_to_gateway()
+
             if self.llm_metrics['call_count'] > 0:
                 self.log_final_metrics()
                 self.reset_llm_metrics()
@@ -374,10 +430,12 @@ class BusinessEvent:
             # Restore the previous span info
             if self.spans:
                 self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
+                self.span_name_str = sanitize_label(span_name) if span_name else ""
             else:
                 self.span_id = None
                 self.span_name = None
                 self.parent_span_id = None
+                self.span_name_str = ""
 
     def log(self, message: str, level: str = 'info', extra_fields: Dict[str, Any] = None):
         log_data = {
@@ -526,6 +584,17 @@ class BusinessEvent:
             # Clear the buffer after sending
             self._log_buffer = []
 
+    def _push_to_gateway(self):
+        # Push metrics to the gateway
+        try:
+            push_to_gateway(
+                current_app.config['PUSH_GATEWAY_URL'],
+                job=current_app.config['COMPONENT_NAME'],
+                registry=REGISTRY
+            )
+        except Exception as e:
+            current_app.logger.error(f"Failed to push metrics to Prometheus Push Gateway: {e}")
+
     def __enter__(self):
         self.trace_start_time = time.time()
         self.log(f'Starting Trace for {self.event_type}')
@@ -537,7 +606,7 @@ class BusinessEvent:
         # Record trace duration
         TRACE_DURATION.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
+            event_type=self.event_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
@@ -546,18 +615,22 @@ class BusinessEvent:
         # Decrement concurrent traces gauge
         CONCURRENT_TRACES.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
+            event_type=self.event_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
         ).dec()
 
+        self._push_to_gateway()
+
         if self.llm_metrics['call_count'] > 0:
             self.log_final_metrics()
             self.reset_llm_metrics()
 
         self.log(f'Ending Trace for {self.event_type}', extra_fields={'trace_duration': trace_total_time})
         self._flush_log_buffer()
+
+
         return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb)
 
     async def __aenter__(self):
@@ -571,7 +644,7 @@ class BusinessEvent:
         # Record trace duration
         TRACE_DURATION.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
+            event_type=self.event_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
@@ -580,12 +653,14 @@ class BusinessEvent:
         # Decrement concurrent traces gauge
         CONCURRENT_TRACES.labels(
             tenant_id=self.tenant_id_str,
-            event_type=self.event_type,
+            event_type=self.event_type_str,
             specialist_id=self.specialist_id_str,
             specialist_type=self.specialist_type_str,
             specialist_type_version=self.specialist_type_version_str
         ).dec()
 
+        self._push_to_gateway()
+
         if self.llm_metrics['call_count'] > 0:
             self.log_final_metrics()
             self.reset_llm_metrics()
diff --git a/common/utils/performance_monitoring.py b/common/utils/performance_monitoring.py
deleted file mode 100644
index 3727000..0000000
--- a/common/utils/performance_monitoring.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import time
-import threading
-from contextlib import contextmanager
-from functools import wraps
-from prometheus_client import Counter, Histogram, Summary, start_http_server, Gauge
-from flask import current_app, g, request, Flask
-
-
-class EveAIMetrics:
-    """
-    Central class for Prometheus metrics infrastructure.
-    This class initializes the Prometheus HTTP server and provides
-    shared functionality for metrics across components.
-
-    Component-specific metrics should be defined in their respective modules.
-    """
-
-    def __init__(self, app: Flask = None):
-        self.app = app
-        self._metrics_server_started = False
-        if app is not None:
-            self.init_app(app)
-
-    def init_app(self, app: Flask):
-        """Initialize metrics with Flask app and start Prometheus server"""
-        self.app = app
-        self._start_metrics_server()
-
-    def _start_metrics_server(self):
-        """Start the Prometheus metrics HTTP server if not already running"""
-        if not self._metrics_server_started:
-            try:
-                metrics_port = self.app.config.get('PROMETHEUS_PORT', 8000)
-                start_http_server(metrics_port)
-                self.app.logger.info(f"Prometheus metrics server started on port {metrics_port}")
-                self._metrics_server_started = True
-            except Exception as e:
-                self.app.logger.error(f"Failed to start metrics server: {e}")
-
-    @staticmethod
-    def get_standard_buckets():
-        """
-        Return the standard duration buckets for histogram metrics.
-        Components should use these for consistency across the system.
-        """
-        return [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')]
-
-    @staticmethod
-    def sanitize_label_values(labels_dict):
-        """
-        Convert all label values to strings as required by Prometheus.
-
-        Args:
-            labels_dict: Dictionary of label name to label value
-
-        Returns:
-            Dictionary with all values converted to strings
-        """
-        return {k: str(v) if v is not None else "" for k, v in labels_dict.items()}
diff --git a/common/utils/prometheus_utils.py b/common/utils/prometheus_utils.py
new file mode 100644
index 0000000..4b50cbe
--- /dev/null
+++ b/common/utils/prometheus_utils.py
@@ -0,0 +1,11 @@
+from flask import current_app
+from prometheus_client import push_to_gateway
+
+
+def sanitize_label(value):
+    """Convert value to valid Prometheus label by removing/replacing invalid chars"""
+    if value is None:
+        return ""
+    # Replace spaces and special chars with underscores
+    import re
+    return re.sub(r'[^a-zA-Z0-9_]', '_', str(value))
diff --git a/config/config.py b/config/config.py
index 9c8b3d6..8f59a3a 100644
--- a/config/config.py
+++ b/config/config.py
@@ -14,7 +14,17 @@ class Config(object):
     SECRET_KEY = environ.get('SECRET_KEY')
     SESSION_COOKIE_SECURE = False
     SESSION_COOKIE_HTTPONLY = True
-    SESSION_KEY_PREFIX = f'{environ.get('COMPONENT_NAME')}_'
+    COMPONENT_NAME = environ.get('COMPONENT_NAME')
+    SESSION_KEY_PREFIX = f'{COMPONENT_NAME}_'
+
+    # Database Settings
+    DB_HOST = environ.get('DB_HOST')
+    DB_USER = environ.get('DB_USER')
+    DB_PASS = environ.get('DB_PASS')
+    DB_NAME = environ.get('DB_NAME')
+    DB_PORT = environ.get('DB_PORT')
+    SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
+    SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
 
     WTF_CSRF_ENABLED = True
     WTF_CSRF_TIME_LIMIT = None
@@ -154,12 +164,17 @@ class Config(object):
     COMPRESSION_PROCESS_DELAY = 1
 
     # WordPress Integration Settings
-    WORDPRESS_PROTOCOL = os.environ.get('WORDPRESS_PROTOCOL', 'http')
-    WORDPRESS_HOST = os.environ.get('WORDPRESS_HOST', 'host.docker.internal')
-    WORDPRESS_PORT = os.environ.get('WORDPRESS_PORT', '10003')
+    WORDPRESS_PROTOCOL = environ.get('WORDPRESS_PROTOCOL', 'http')
+    WORDPRESS_HOST = environ.get('WORDPRESS_HOST', 'host.docker.internal')
+    WORDPRESS_PORT = environ.get('WORDPRESS_PORT', '10003')
     WORDPRESS_BASE_URL = f"{WORDPRESS_PROTOCOL}://{WORDPRESS_HOST}:{WORDPRESS_PORT}"
     EXTERNAL_WORDPRESS_BASE_URL = 'localhost:10003'
 
+    # Prometheus PUSH Gataway
+    PUSH_GATEWAY_HOST = environ.get('PUSH_GATEWAY_HOST', 'pushgateway')
+    PUSH_GATEWAY_PORT = environ.get('PUSH_GATEWAY_PORT', '9091')
+    PUSH_GATEWAY_URL = f"{PUSH_GATEWAY_HOST}:{PUSH_GATEWAY_PORT}"
+
 
 class DevConfig(Config):
     DEVELOPMENT = True
@@ -167,14 +182,6 @@ class DevConfig(Config):
     FLASK_DEBUG = True
     EXPLAIN_TEMPLATE_LOADING = False
 
-    # Database Settings
-    DB_HOST = environ.get('DB_HOST', 'localhost')
-    DB_USER = environ.get('DB_USER', 'luke')
-    DB_PASS = environ.get('DB_PASS', 'Skywalker!')
-    DB_NAME = environ.get('DB_NAME', 'eveai')
-    SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:5432/{DB_NAME}'
-    SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
-
     # Define the nginx prefix used for the specific apps
     EVEAI_APP_LOCATION_PREFIX = '/admin'
     EVEAI_CHAT_LOCATION_PREFIX = '/chat'
@@ -237,7 +244,6 @@ class DevConfig(Config):
 class ProdConfig(Config):
     DEVELOPMENT = False
     DEBUG = False
-    DEBUG = False
     FLASK_DEBUG = False
     EXPLAIN_TEMPLATE_LOADING = False
 
@@ -246,15 +252,6 @@ class ProdConfig(Config):
 
     WTF_CSRF_SSL_STRICT = True  # Set to True if using HTTPS
 
-    # Database Settings
-    DB_HOST = environ.get('DB_HOST')
-    DB_USER = environ.get('DB_USER')
-    DB_PASS = environ.get('DB_PASS')
-    DB_NAME = environ.get('DB_NAME')
-    DB_PORT = environ.get('DB_PORT')
-    SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
-    SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
-
     # flask-mailman settings
     MAIL_SERVER = 'mail.askeveai.com'
     MAIL_PORT = 587
diff --git a/docker/compose_dev.yaml b/docker/compose_dev.yaml
index 6537859..41c011e 100644
--- a/docker/compose_dev.yaml
+++ b/docker/compose_dev.yaml
@@ -39,6 +39,8 @@ x-common-variables: &common-variables
   LANGCHAIN_API_KEY: "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"
   SERPER_API_KEY: "e4c553856d0e6b5a171ec5e6b69d874285b9badf"
   CREWAI_STORAGE_DIR: "/app/crewai_storage"
+  PUSH_GATEWAY_HOST: "pushgateway"
+  PUSH_GATEWAY_PORT: "9091"
 
 services:
   nginx:
@@ -389,6 +391,14 @@ services:
     networks:
       - eveai-network
 
+  pushgateway:
+    image: prom/pushgateway:latest
+    restart: unless-stopped
+    ports:
+      - "9091:9091"
+    networks:
+      - eveai-network
+
   grafana:
     image: grafana/grafana:latest
     container_name: grafana
diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml
index 6c3860d..9163c59 100644
--- a/docker/prometheus/prometheus.yml
+++ b/docker/prometheus/prometheus.yml
@@ -32,3 +32,8 @@ scrape_configs:
     static_configs:
       - targets: ['eveai_entitlements:8000']
     scrape_interval: 10s
+
+  - job_name: 'pushgateway'
+    honor_labels: true
+    static_configs:
+      - targets: [ 'pushgateway:9091' ]
\ No newline at end of file
diff --git a/eveai_chat_workers/__init__.py b/eveai_chat_workers/__init__.py
index 31356a9..35ea409 100644
--- a/eveai_chat_workers/__init__.py
+++ b/eveai_chat_workers/__init__.py
@@ -5,7 +5,7 @@ import os
 
 from common.langchain.templates.template_manager import TemplateManager
 from common.utils.celery_utils import make_celery, init_celery
-from common.extensions import db, template_manager, cache_manager, eveai_metrics
+from common.extensions import db, template_manager, cache_manager
 from config.logging_config import LOGGING
 from config.config import get_config
 
@@ -45,7 +45,6 @@ def register_extensions(app):
     db.init_app(app)
     cache_manager.init_app(app)
     template_manager.init_app(app)
-    eveai_metrics.init_app(app)
 
 
 def register_cache_handlers(app):