From 4ea16521e20d4d4073dbcee280f51479eb5c907a Mon Sep 17 00:00:00 2001 From: Josako Date: Tue, 25 Mar 2025 15:48:00 +0100 Subject: [PATCH] - Prometheus metrics go via pushgateway, as different worker processes might have different registries that are not picked up by Prometheus --- CHANGELOG.md | 32 ++++++ common/extensions.py | 2 - common/utils/business_event.py | 151 ++++++++++++++++++------- common/utils/performance_monitoring.py | 59 ---------- common/utils/prometheus_utils.py | 11 ++ config/config.py | 41 ++++--- docker/compose_dev.yaml | 10 ++ docker/prometheus/prometheus.yml | 5 + eveai_chat_workers/__init__.py | 3 +- 9 files changed, 191 insertions(+), 123 deletions(-) delete mode 100644 common/utils/performance_monitoring.py create mode 100644 common/utils/prometheus_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 984e8b1..654855e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Security - In case of vulnerabilities. +## [2.2.0-alfa] + +### Added +- Mistral AI as main provider for embeddings, chains and specialists +- Usage measuring for specialists +- RAG from chain to specialist technology +- Dossier catalog management possibilities added to eveai_app +- Asset definition (Paused - other priorities) +- Prometheus and Grafana +- Add prometheus monitoring to business events +- Asynchronous execution of specialists + +### Changed +- Moved choice for AI providers / models to specialists and prompts +- Improve RAG to not repeat historic answers +- Fixed embedding model, no more choices allowed +- clean url (of tracking parameters) before adding it to a catalog + +### Deprecated +- For soon-to-be removed features. + +### Removed +- Add Multiple URLs removed from menu +- Old Specialist items removed from interaction menu +- + +### Fixed +- Set default language when registering Documents or URLs. + +### Security +- In case of vulnerabilities. + ## [2.1.0-alfa] ### Added diff --git a/common/extensions.py b/common/extensions.py index c38fa7f..df4fc8d 100644 --- a/common/extensions.py +++ b/common/extensions.py @@ -15,7 +15,6 @@ from .langchain.templates.template_manager import TemplateManager from .utils.cache.eveai_cache_manager import EveAICacheManager from .utils.simple_encryption import SimpleEncryption from .utils.minio_utils import MinioClient -from .utils.performance_monitoring import EveAIMetrics # Create extensions @@ -35,5 +34,4 @@ minio_client = MinioClient() metrics = PrometheusMetrics.for_app_factory() template_manager = TemplateManager() cache_manager = EveAICacheManager() -eveai_metrics = EveAIMetrics() diff --git a/common/utils/business_event.py b/common/utils/business_event.py index 1cb8feb..2e5b150 100644 --- a/common/utils/business_event.py +++ b/common/utils/business_event.py @@ -6,16 +6,18 @@ from datetime import datetime from typing import Dict, Any, Optional, List from datetime import datetime as dt, timezone as tz import logging -from prometheus_client import Counter, Histogram, Gauge, Summary + +from flask import current_app +from prometheus_client import Counter, Histogram, Gauge, Summary, push_to_gateway, REGISTRY from .business_event_context import BusinessEventContext from common.models.entitlements import BusinessEventLog from common.extensions import db from .celery_utils import current_celery -from common.utils.performance_monitoring import EveAIMetrics +from common.utils.prometheus_utils import sanitize_label # Standard duration buckets for all histograms -DURATION_BUCKETS = EveAIMetrics.get_standard_buckets() +DURATION_BUCKETS = [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')] # Prometheus metrics for business events TRACE_COUNTER = Counter( @@ -112,14 +114,24 @@ class BusinessEvent: # Prometheus label values must be strings self.tenant_id_str = str(self.tenant_id) + self.event_type_str = sanitize_label(self.event_type) self.specialist_id_str = str(self.specialist_id) if self.specialist_id else "" self.specialist_type_str = str(self.specialist_type) if self.specialist_type else "" - self.specialist_type_version_str = str(self.specialist_type_version) if self.specialist_type_version else "" + self.specialist_type_version_str = sanitize_label(str(self.specialist_type_version)) \ + if self.specialist_type_version else "" + self.span_name_str = "" + + current_app.logger.debug(f"Labels for metrics: " + f"tenant_id={self.tenant_id_str}, " + f"event_type={self.event_type_str}," + f"specialist_id={self.specialist_id_str}, " + f"specialist_type={self.specialist_type_str}, " + + f"specialist_type_version={self.specialist_type_version_str}") # Increment concurrent events gauge when initialized CONCURRENT_TRACES.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, + event_type=self.event_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -128,12 +140,14 @@ class BusinessEvent: # Increment trace counter TRACE_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, + event_type=self.event_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).inc() + self._push_to_gateway() + def update_attribute(self, attribute: str, value: any): if hasattr(self, attribute): setattr(self, attribute, value) @@ -143,9 +157,13 @@ class BusinessEvent: elif attribute == 'specialist_type': self.specialist_type_str = str(value) if value else "" elif attribute == 'specialist_type_version': - self.specialist_type_version_str = str(value) if value else "" + self.specialist_type_version_str = sanitize_label(str(value)) if value else "" elif attribute == 'tenant_id': self.tenant_id_str = str(value) + elif attribute == 'event_type': + self.event_type_str = sanitize_label(value) + elif attribute == 'span_name': + self.span_name_str = sanitize_label(value) else: raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'") @@ -158,13 +176,21 @@ class BusinessEvent: self.llm_metrics['interaction_type'] = metrics['interaction_type'] # Track in Prometheus metrics - interaction_type = metrics['interaction_type'] + interaction_type_str = sanitize_label(metrics['interaction_type']) if metrics['interaction_type'] else "" + + current_app.logger.debug(f"Labels for metrics: " + f"tenant_id={self.tenant_id_str}, " + f"event_type={self.event_type_str}," + f"interaction_type={interaction_type_str}, " + f"specialist_id={self.specialist_id_str}, " + f"specialist_type={self.specialist_type_str}, " + f"specialist_type_version={self.specialist_type_version_str}") # Track token usage LLM_TOKENS_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - interaction_type=interaction_type, + event_type=self.event_type_str, + interaction_type=interaction_type_str, token_type='total', specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, @@ -173,8 +199,8 @@ class BusinessEvent: LLM_TOKENS_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - interaction_type=interaction_type, + event_type=self.event_type_str, + interaction_type=interaction_type_str, token_type='prompt', specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, @@ -183,8 +209,8 @@ class BusinessEvent: LLM_TOKENS_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - interaction_type=interaction_type, + event_type=self.event_type_str, + interaction_type=interaction_type_str, token_type='completion', specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, @@ -194,8 +220,8 @@ class BusinessEvent: # Track duration LLM_DURATION.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - interaction_type=interaction_type, + event_type=self.event_type_str, + interaction_type=interaction_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -204,13 +230,15 @@ class BusinessEvent: # Track call count LLM_CALLS_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - interaction_type=interaction_type, + event_type=self.event_type_str, + interaction_type=interaction_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).inc() + self._push_to_gateway() + def reset_llm_metrics(self): self.llm_metrics['total_tokens'] = 0 self.llm_metrics['prompt_tokens'] = 0 @@ -236,16 +264,25 @@ class BusinessEvent: # Set the new span info self.span_id = new_span_id self.span_name = span_name + self.span_name_str = sanitize_label(span_name) if span_name else "" self.parent_span_id = parent_span_id # Track start time for the span span_start_time = time.time() + current_app.logger.debug(f"Labels for metrics: " + f"tenant_id={self.tenant_id_str}, " + f"event_type={self.event_type_str}, " + f"activity_name={self.span_name_str}, " + f"specialist_id={self.specialist_id_str}, " + f"specialist_type={self.specialist_type_str}, " + f"specialist_type_version={self.specialist_type_version_str}") + # Increment span metrics - using span_name as activity_name for metrics SPAN_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -254,13 +291,15 @@ class BusinessEvent: # Increment concurrent spans gauge CONCURRENT_SPANS.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).inc() + self._push_to_gateway() + self.log(f"Start") try: @@ -272,8 +311,8 @@ class BusinessEvent: # Observe span duration SPAN_DURATION.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -282,13 +321,15 @@ class BusinessEvent: # Decrement concurrent spans gauge CONCURRENT_SPANS.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).dec() + self._push_to_gateway() + if self.llm_metrics['call_count'] > 0: self.log_final_metrics() self.reset_llm_metrics() @@ -296,10 +337,12 @@ class BusinessEvent: # Restore the previous span info if self.spans: self.span_id, self.span_name, self.parent_span_id = self.spans.pop() + self.span_name_str = sanitize_label(span_name) if span_name else "" else: self.span_id = None self.span_name = None self.parent_span_id = None + self.span_name_str = "" @asynccontextmanager async def create_span_async(self, span_name: str): @@ -314,16 +357,25 @@ class BusinessEvent: # Set the new span info self.span_id = new_span_id self.span_name = span_name + self.span_name_str = sanitize_label(span_name) if span_name else "" self.parent_span_id = parent_span_id # Track start time for the span span_start_time = time.time() + current_app.logger.debug(f"Labels for metrics: " + f"tenant_id={self.tenant_id_str}, " + f"event_type={self.event_type_str}, " + f"activity_name={self.span_name_str}, " + f"specialist_id={self.specialist_id_str}, " + f"specialist_type={self.specialist_type_str}, " + f"specialist_type_version={self.specialist_type_version_str}") + # Increment span metrics - using span_name as activity_name for metrics SPAN_COUNTER.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -332,13 +384,15 @@ class BusinessEvent: # Increment concurrent spans gauge CONCURRENT_SPANS.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).inc() + self._push_to_gateway() + self.log(f"Start") try: @@ -350,8 +404,8 @@ class BusinessEvent: # Observe span duration SPAN_DURATION.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -360,13 +414,15 @@ class BusinessEvent: # Decrement concurrent spans gauge CONCURRENT_SPANS.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, - activity_name=span_name, + event_type=self.event_type_str, + activity_name=self.span_name_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).dec() + self._push_to_gateway() + if self.llm_metrics['call_count'] > 0: self.log_final_metrics() self.reset_llm_metrics() @@ -374,10 +430,12 @@ class BusinessEvent: # Restore the previous span info if self.spans: self.span_id, self.span_name, self.parent_span_id = self.spans.pop() + self.span_name_str = sanitize_label(span_name) if span_name else "" else: self.span_id = None self.span_name = None self.parent_span_id = None + self.span_name_str = "" def log(self, message: str, level: str = 'info', extra_fields: Dict[str, Any] = None): log_data = { @@ -526,6 +584,17 @@ class BusinessEvent: # Clear the buffer after sending self._log_buffer = [] + def _push_to_gateway(self): + # Push metrics to the gateway + try: + push_to_gateway( + current_app.config['PUSH_GATEWAY_URL'], + job=current_app.config['COMPONENT_NAME'], + registry=REGISTRY + ) + except Exception as e: + current_app.logger.error(f"Failed to push metrics to Prometheus Push Gateway: {e}") + def __enter__(self): self.trace_start_time = time.time() self.log(f'Starting Trace for {self.event_type}') @@ -537,7 +606,7 @@ class BusinessEvent: # Record trace duration TRACE_DURATION.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, + event_type=self.event_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -546,18 +615,22 @@ class BusinessEvent: # Decrement concurrent traces gauge CONCURRENT_TRACES.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, + event_type=self.event_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).dec() + self._push_to_gateway() + if self.llm_metrics['call_count'] > 0: self.log_final_metrics() self.reset_llm_metrics() self.log(f'Ending Trace for {self.event_type}', extra_fields={'trace_duration': trace_total_time}) self._flush_log_buffer() + + return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb) async def __aenter__(self): @@ -571,7 +644,7 @@ class BusinessEvent: # Record trace duration TRACE_DURATION.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, + event_type=self.event_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str @@ -580,12 +653,14 @@ class BusinessEvent: # Decrement concurrent traces gauge CONCURRENT_TRACES.labels( tenant_id=self.tenant_id_str, - event_type=self.event_type, + event_type=self.event_type_str, specialist_id=self.specialist_id_str, specialist_type=self.specialist_type_str, specialist_type_version=self.specialist_type_version_str ).dec() + self._push_to_gateway() + if self.llm_metrics['call_count'] > 0: self.log_final_metrics() self.reset_llm_metrics() diff --git a/common/utils/performance_monitoring.py b/common/utils/performance_monitoring.py deleted file mode 100644 index 3727000..0000000 --- a/common/utils/performance_monitoring.py +++ /dev/null @@ -1,59 +0,0 @@ -import time -import threading -from contextlib import contextmanager -from functools import wraps -from prometheus_client import Counter, Histogram, Summary, start_http_server, Gauge -from flask import current_app, g, request, Flask - - -class EveAIMetrics: - """ - Central class for Prometheus metrics infrastructure. - This class initializes the Prometheus HTTP server and provides - shared functionality for metrics across components. - - Component-specific metrics should be defined in their respective modules. - """ - - def __init__(self, app: Flask = None): - self.app = app - self._metrics_server_started = False - if app is not None: - self.init_app(app) - - def init_app(self, app: Flask): - """Initialize metrics with Flask app and start Prometheus server""" - self.app = app - self._start_metrics_server() - - def _start_metrics_server(self): - """Start the Prometheus metrics HTTP server if not already running""" - if not self._metrics_server_started: - try: - metrics_port = self.app.config.get('PROMETHEUS_PORT', 8000) - start_http_server(metrics_port) - self.app.logger.info(f"Prometheus metrics server started on port {metrics_port}") - self._metrics_server_started = True - except Exception as e: - self.app.logger.error(f"Failed to start metrics server: {e}") - - @staticmethod - def get_standard_buckets(): - """ - Return the standard duration buckets for histogram metrics. - Components should use these for consistency across the system. - """ - return [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')] - - @staticmethod - def sanitize_label_values(labels_dict): - """ - Convert all label values to strings as required by Prometheus. - - Args: - labels_dict: Dictionary of label name to label value - - Returns: - Dictionary with all values converted to strings - """ - return {k: str(v) if v is not None else "" for k, v in labels_dict.items()} diff --git a/common/utils/prometheus_utils.py b/common/utils/prometheus_utils.py new file mode 100644 index 0000000..4b50cbe --- /dev/null +++ b/common/utils/prometheus_utils.py @@ -0,0 +1,11 @@ +from flask import current_app +from prometheus_client import push_to_gateway + + +def sanitize_label(value): + """Convert value to valid Prometheus label by removing/replacing invalid chars""" + if value is None: + return "" + # Replace spaces and special chars with underscores + import re + return re.sub(r'[^a-zA-Z0-9_]', '_', str(value)) diff --git a/config/config.py b/config/config.py index 9c8b3d6..8f59a3a 100644 --- a/config/config.py +++ b/config/config.py @@ -14,7 +14,17 @@ class Config(object): SECRET_KEY = environ.get('SECRET_KEY') SESSION_COOKIE_SECURE = False SESSION_COOKIE_HTTPONLY = True - SESSION_KEY_PREFIX = f'{environ.get('COMPONENT_NAME')}_' + COMPONENT_NAME = environ.get('COMPONENT_NAME') + SESSION_KEY_PREFIX = f'{COMPONENT_NAME}_' + + # Database Settings + DB_HOST = environ.get('DB_HOST') + DB_USER = environ.get('DB_USER') + DB_PASS = environ.get('DB_PASS') + DB_NAME = environ.get('DB_NAME') + DB_PORT = environ.get('DB_PORT') + SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}' + SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI} WTF_CSRF_ENABLED = True WTF_CSRF_TIME_LIMIT = None @@ -154,12 +164,17 @@ class Config(object): COMPRESSION_PROCESS_DELAY = 1 # WordPress Integration Settings - WORDPRESS_PROTOCOL = os.environ.get('WORDPRESS_PROTOCOL', 'http') - WORDPRESS_HOST = os.environ.get('WORDPRESS_HOST', 'host.docker.internal') - WORDPRESS_PORT = os.environ.get('WORDPRESS_PORT', '10003') + WORDPRESS_PROTOCOL = environ.get('WORDPRESS_PROTOCOL', 'http') + WORDPRESS_HOST = environ.get('WORDPRESS_HOST', 'host.docker.internal') + WORDPRESS_PORT = environ.get('WORDPRESS_PORT', '10003') WORDPRESS_BASE_URL = f"{WORDPRESS_PROTOCOL}://{WORDPRESS_HOST}:{WORDPRESS_PORT}" EXTERNAL_WORDPRESS_BASE_URL = 'localhost:10003' + # Prometheus PUSH Gataway + PUSH_GATEWAY_HOST = environ.get('PUSH_GATEWAY_HOST', 'pushgateway') + PUSH_GATEWAY_PORT = environ.get('PUSH_GATEWAY_PORT', '9091') + PUSH_GATEWAY_URL = f"{PUSH_GATEWAY_HOST}:{PUSH_GATEWAY_PORT}" + class DevConfig(Config): DEVELOPMENT = True @@ -167,14 +182,6 @@ class DevConfig(Config): FLASK_DEBUG = True EXPLAIN_TEMPLATE_LOADING = False - # Database Settings - DB_HOST = environ.get('DB_HOST', 'localhost') - DB_USER = environ.get('DB_USER', 'luke') - DB_PASS = environ.get('DB_PASS', 'Skywalker!') - DB_NAME = environ.get('DB_NAME', 'eveai') - SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:5432/{DB_NAME}' - SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI} - # Define the nginx prefix used for the specific apps EVEAI_APP_LOCATION_PREFIX = '/admin' EVEAI_CHAT_LOCATION_PREFIX = '/chat' @@ -237,7 +244,6 @@ class DevConfig(Config): class ProdConfig(Config): DEVELOPMENT = False DEBUG = False - DEBUG = False FLASK_DEBUG = False EXPLAIN_TEMPLATE_LOADING = False @@ -246,15 +252,6 @@ class ProdConfig(Config): WTF_CSRF_SSL_STRICT = True # Set to True if using HTTPS - # Database Settings - DB_HOST = environ.get('DB_HOST') - DB_USER = environ.get('DB_USER') - DB_PASS = environ.get('DB_PASS') - DB_NAME = environ.get('DB_NAME') - DB_PORT = environ.get('DB_PORT') - SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}' - SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI} - # flask-mailman settings MAIL_SERVER = 'mail.askeveai.com' MAIL_PORT = 587 diff --git a/docker/compose_dev.yaml b/docker/compose_dev.yaml index 6537859..41c011e 100644 --- a/docker/compose_dev.yaml +++ b/docker/compose_dev.yaml @@ -39,6 +39,8 @@ x-common-variables: &common-variables LANGCHAIN_API_KEY: "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" SERPER_API_KEY: "e4c553856d0e6b5a171ec5e6b69d874285b9badf" CREWAI_STORAGE_DIR: "/app/crewai_storage" + PUSH_GATEWAY_HOST: "pushgateway" + PUSH_GATEWAY_PORT: "9091" services: nginx: @@ -389,6 +391,14 @@ services: networks: - eveai-network + pushgateway: + image: prom/pushgateway:latest + restart: unless-stopped + ports: + - "9091:9091" + networks: + - eveai-network + grafana: image: grafana/grafana:latest container_name: grafana diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml index 6c3860d..9163c59 100644 --- a/docker/prometheus/prometheus.yml +++ b/docker/prometheus/prometheus.yml @@ -32,3 +32,8 @@ scrape_configs: static_configs: - targets: ['eveai_entitlements:8000'] scrape_interval: 10s + + - job_name: 'pushgateway' + honor_labels: true + static_configs: + - targets: [ 'pushgateway:9091' ] \ No newline at end of file diff --git a/eveai_chat_workers/__init__.py b/eveai_chat_workers/__init__.py index 31356a9..35ea409 100644 --- a/eveai_chat_workers/__init__.py +++ b/eveai_chat_workers/__init__.py @@ -5,7 +5,7 @@ import os from common.langchain.templates.template_manager import TemplateManager from common.utils.celery_utils import make_celery, init_celery -from common.extensions import db, template_manager, cache_manager, eveai_metrics +from common.extensions import db, template_manager, cache_manager from config.logging_config import LOGGING from config.config import get_config @@ -45,7 +45,6 @@ def register_extensions(app): db.init_app(app) cache_manager.init_app(app) template_manager.init_app(app) - eveai_metrics.init_app(app) def register_cache_handlers(app):