- Prometheus metrics go via pushgateway, as different worker processes might have different registries that are not picked up by Prometheus

This commit is contained in:
Josako
2025-03-25 15:48:00 +01:00
parent b6ee7182de
commit 4ea16521e2
9 changed files with 191 additions and 123 deletions

View File

@@ -25,6 +25,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security ### Security
- In case of vulnerabilities. - In case of vulnerabilities.
## [2.2.0-alfa]
### Added
- Mistral AI as main provider for embeddings, chains and specialists
- Usage measuring for specialists
- RAG from chain to specialist technology
- Dossier catalog management possibilities added to eveai_app
- Asset definition (Paused - other priorities)
- Prometheus and Grafana
- Add prometheus monitoring to business events
- Asynchronous execution of specialists
### Changed
- Moved choice for AI providers / models to specialists and prompts
- Improve RAG to not repeat historic answers
- Fixed embedding model, no more choices allowed
- clean url (of tracking parameters) before adding it to a catalog
### Deprecated
- For soon-to-be removed features.
### Removed
- Add Multiple URLs removed from menu
- Old Specialist items removed from interaction menu
-
### Fixed
- Set default language when registering Documents or URLs.
### Security
- In case of vulnerabilities.
## [2.1.0-alfa] ## [2.1.0-alfa]
### Added ### Added

View File

@@ -15,7 +15,6 @@ from .langchain.templates.template_manager import TemplateManager
from .utils.cache.eveai_cache_manager import EveAICacheManager from .utils.cache.eveai_cache_manager import EveAICacheManager
from .utils.simple_encryption import SimpleEncryption from .utils.simple_encryption import SimpleEncryption
from .utils.minio_utils import MinioClient from .utils.minio_utils import MinioClient
from .utils.performance_monitoring import EveAIMetrics
# Create extensions # Create extensions
@@ -35,5 +34,4 @@ minio_client = MinioClient()
metrics = PrometheusMetrics.for_app_factory() metrics = PrometheusMetrics.for_app_factory()
template_manager = TemplateManager() template_manager = TemplateManager()
cache_manager = EveAICacheManager() cache_manager = EveAICacheManager()
eveai_metrics = EveAIMetrics()

View File

@@ -6,16 +6,18 @@ from datetime import datetime
from typing import Dict, Any, Optional, List from typing import Dict, Any, Optional, List
from datetime import datetime as dt, timezone as tz from datetime import datetime as dt, timezone as tz
import logging import logging
from prometheus_client import Counter, Histogram, Gauge, Summary
from flask import current_app
from prometheus_client import Counter, Histogram, Gauge, Summary, push_to_gateway, REGISTRY
from .business_event_context import BusinessEventContext from .business_event_context import BusinessEventContext
from common.models.entitlements import BusinessEventLog from common.models.entitlements import BusinessEventLog
from common.extensions import db from common.extensions import db
from .celery_utils import current_celery from .celery_utils import current_celery
from common.utils.performance_monitoring import EveAIMetrics from common.utils.prometheus_utils import sanitize_label
# Standard duration buckets for all histograms # Standard duration buckets for all histograms
DURATION_BUCKETS = EveAIMetrics.get_standard_buckets() DURATION_BUCKETS = [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')]
# Prometheus metrics for business events # Prometheus metrics for business events
TRACE_COUNTER = Counter( TRACE_COUNTER = Counter(
@@ -112,14 +114,24 @@ class BusinessEvent:
# Prometheus label values must be strings # Prometheus label values must be strings
self.tenant_id_str = str(self.tenant_id) self.tenant_id_str = str(self.tenant_id)
self.event_type_str = sanitize_label(self.event_type)
self.specialist_id_str = str(self.specialist_id) if self.specialist_id else "" self.specialist_id_str = str(self.specialist_id) if self.specialist_id else ""
self.specialist_type_str = str(self.specialist_type) if self.specialist_type else "" self.specialist_type_str = str(self.specialist_type) if self.specialist_type else ""
self.specialist_type_version_str = str(self.specialist_type_version) if self.specialist_type_version else "" self.specialist_type_version_str = sanitize_label(str(self.specialist_type_version)) \
if self.specialist_type_version else ""
self.span_name_str = ""
current_app.logger.debug(f"Labels for metrics: "
f"tenant_id={self.tenant_id_str}, "
f"event_type={self.event_type_str},"
f"specialist_id={self.specialist_id_str}, "
f"specialist_type={self.specialist_type_str}, " +
f"specialist_type_version={self.specialist_type_version_str}")
# Increment concurrent events gauge when initialized # Increment concurrent events gauge when initialized
CONCURRENT_TRACES.labels( CONCURRENT_TRACES.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -128,12 +140,14 @@ class BusinessEvent:
# Increment trace counter # Increment trace counter
TRACE_COUNTER.labels( TRACE_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).inc() ).inc()
self._push_to_gateway()
def update_attribute(self, attribute: str, value: any): def update_attribute(self, attribute: str, value: any):
if hasattr(self, attribute): if hasattr(self, attribute):
setattr(self, attribute, value) setattr(self, attribute, value)
@@ -143,9 +157,13 @@ class BusinessEvent:
elif attribute == 'specialist_type': elif attribute == 'specialist_type':
self.specialist_type_str = str(value) if value else "" self.specialist_type_str = str(value) if value else ""
elif attribute == 'specialist_type_version': elif attribute == 'specialist_type_version':
self.specialist_type_version_str = str(value) if value else "" self.specialist_type_version_str = sanitize_label(str(value)) if value else ""
elif attribute == 'tenant_id': elif attribute == 'tenant_id':
self.tenant_id_str = str(value) self.tenant_id_str = str(value)
elif attribute == 'event_type':
self.event_type_str = sanitize_label(value)
elif attribute == 'span_name':
self.span_name_str = sanitize_label(value)
else: else:
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'") raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'")
@@ -158,13 +176,21 @@ class BusinessEvent:
self.llm_metrics['interaction_type'] = metrics['interaction_type'] self.llm_metrics['interaction_type'] = metrics['interaction_type']
# Track in Prometheus metrics # Track in Prometheus metrics
interaction_type = metrics['interaction_type'] interaction_type_str = sanitize_label(metrics['interaction_type']) if metrics['interaction_type'] else ""
current_app.logger.debug(f"Labels for metrics: "
f"tenant_id={self.tenant_id_str}, "
f"event_type={self.event_type_str},"
f"interaction_type={interaction_type_str}, "
f"specialist_id={self.specialist_id_str}, "
f"specialist_type={self.specialist_type_str}, "
f"specialist_type_version={self.specialist_type_version_str}")
# Track token usage # Track token usage
LLM_TOKENS_COUNTER.labels( LLM_TOKENS_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
interaction_type=interaction_type, interaction_type=interaction_type_str,
token_type='total', token_type='total',
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
@@ -173,8 +199,8 @@ class BusinessEvent:
LLM_TOKENS_COUNTER.labels( LLM_TOKENS_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
interaction_type=interaction_type, interaction_type=interaction_type_str,
token_type='prompt', token_type='prompt',
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
@@ -183,8 +209,8 @@ class BusinessEvent:
LLM_TOKENS_COUNTER.labels( LLM_TOKENS_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
interaction_type=interaction_type, interaction_type=interaction_type_str,
token_type='completion', token_type='completion',
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
@@ -194,8 +220,8 @@ class BusinessEvent:
# Track duration # Track duration
LLM_DURATION.labels( LLM_DURATION.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
interaction_type=interaction_type, interaction_type=interaction_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -204,13 +230,15 @@ class BusinessEvent:
# Track call count # Track call count
LLM_CALLS_COUNTER.labels( LLM_CALLS_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
interaction_type=interaction_type, interaction_type=interaction_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).inc() ).inc()
self._push_to_gateway()
def reset_llm_metrics(self): def reset_llm_metrics(self):
self.llm_metrics['total_tokens'] = 0 self.llm_metrics['total_tokens'] = 0
self.llm_metrics['prompt_tokens'] = 0 self.llm_metrics['prompt_tokens'] = 0
@@ -236,16 +264,25 @@ class BusinessEvent:
# Set the new span info # Set the new span info
self.span_id = new_span_id self.span_id = new_span_id
self.span_name = span_name self.span_name = span_name
self.span_name_str = sanitize_label(span_name) if span_name else ""
self.parent_span_id = parent_span_id self.parent_span_id = parent_span_id
# Track start time for the span # Track start time for the span
span_start_time = time.time() span_start_time = time.time()
current_app.logger.debug(f"Labels for metrics: "
f"tenant_id={self.tenant_id_str}, "
f"event_type={self.event_type_str}, "
f"activity_name={self.span_name_str}, "
f"specialist_id={self.specialist_id_str}, "
f"specialist_type={self.specialist_type_str}, "
f"specialist_type_version={self.specialist_type_version_str}")
# Increment span metrics - using span_name as activity_name for metrics # Increment span metrics - using span_name as activity_name for metrics
SPAN_COUNTER.labels( SPAN_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -254,13 +291,15 @@ class BusinessEvent:
# Increment concurrent spans gauge # Increment concurrent spans gauge
CONCURRENT_SPANS.labels( CONCURRENT_SPANS.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).inc() ).inc()
self._push_to_gateway()
self.log(f"Start") self.log(f"Start")
try: try:
@@ -272,8 +311,8 @@ class BusinessEvent:
# Observe span duration # Observe span duration
SPAN_DURATION.labels( SPAN_DURATION.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -282,13 +321,15 @@ class BusinessEvent:
# Decrement concurrent spans gauge # Decrement concurrent spans gauge
CONCURRENT_SPANS.labels( CONCURRENT_SPANS.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).dec() ).dec()
self._push_to_gateway()
if self.llm_metrics['call_count'] > 0: if self.llm_metrics['call_count'] > 0:
self.log_final_metrics() self.log_final_metrics()
self.reset_llm_metrics() self.reset_llm_metrics()
@@ -296,10 +337,12 @@ class BusinessEvent:
# Restore the previous span info # Restore the previous span info
if self.spans: if self.spans:
self.span_id, self.span_name, self.parent_span_id = self.spans.pop() self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
self.span_name_str = sanitize_label(span_name) if span_name else ""
else: else:
self.span_id = None self.span_id = None
self.span_name = None self.span_name = None
self.parent_span_id = None self.parent_span_id = None
self.span_name_str = ""
@asynccontextmanager @asynccontextmanager
async def create_span_async(self, span_name: str): async def create_span_async(self, span_name: str):
@@ -314,16 +357,25 @@ class BusinessEvent:
# Set the new span info # Set the new span info
self.span_id = new_span_id self.span_id = new_span_id
self.span_name = span_name self.span_name = span_name
self.span_name_str = sanitize_label(span_name) if span_name else ""
self.parent_span_id = parent_span_id self.parent_span_id = parent_span_id
# Track start time for the span # Track start time for the span
span_start_time = time.time() span_start_time = time.time()
current_app.logger.debug(f"Labels for metrics: "
f"tenant_id={self.tenant_id_str}, "
f"event_type={self.event_type_str}, "
f"activity_name={self.span_name_str}, "
f"specialist_id={self.specialist_id_str}, "
f"specialist_type={self.specialist_type_str}, "
f"specialist_type_version={self.specialist_type_version_str}")
# Increment span metrics - using span_name as activity_name for metrics # Increment span metrics - using span_name as activity_name for metrics
SPAN_COUNTER.labels( SPAN_COUNTER.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -332,13 +384,15 @@ class BusinessEvent:
# Increment concurrent spans gauge # Increment concurrent spans gauge
CONCURRENT_SPANS.labels( CONCURRENT_SPANS.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).inc() ).inc()
self._push_to_gateway()
self.log(f"Start") self.log(f"Start")
try: try:
@@ -350,8 +404,8 @@ class BusinessEvent:
# Observe span duration # Observe span duration
SPAN_DURATION.labels( SPAN_DURATION.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -360,13 +414,15 @@ class BusinessEvent:
# Decrement concurrent spans gauge # Decrement concurrent spans gauge
CONCURRENT_SPANS.labels( CONCURRENT_SPANS.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
activity_name=span_name, activity_name=self.span_name_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).dec() ).dec()
self._push_to_gateway()
if self.llm_metrics['call_count'] > 0: if self.llm_metrics['call_count'] > 0:
self.log_final_metrics() self.log_final_metrics()
self.reset_llm_metrics() self.reset_llm_metrics()
@@ -374,10 +430,12 @@ class BusinessEvent:
# Restore the previous span info # Restore the previous span info
if self.spans: if self.spans:
self.span_id, self.span_name, self.parent_span_id = self.spans.pop() self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
self.span_name_str = sanitize_label(span_name) if span_name else ""
else: else:
self.span_id = None self.span_id = None
self.span_name = None self.span_name = None
self.parent_span_id = None self.parent_span_id = None
self.span_name_str = ""
def log(self, message: str, level: str = 'info', extra_fields: Dict[str, Any] = None): def log(self, message: str, level: str = 'info', extra_fields: Dict[str, Any] = None):
log_data = { log_data = {
@@ -526,6 +584,17 @@ class BusinessEvent:
# Clear the buffer after sending # Clear the buffer after sending
self._log_buffer = [] self._log_buffer = []
def _push_to_gateway(self):
# Push metrics to the gateway
try:
push_to_gateway(
current_app.config['PUSH_GATEWAY_URL'],
job=current_app.config['COMPONENT_NAME'],
registry=REGISTRY
)
except Exception as e:
current_app.logger.error(f"Failed to push metrics to Prometheus Push Gateway: {e}")
def __enter__(self): def __enter__(self):
self.trace_start_time = time.time() self.trace_start_time = time.time()
self.log(f'Starting Trace for {self.event_type}') self.log(f'Starting Trace for {self.event_type}')
@@ -537,7 +606,7 @@ class BusinessEvent:
# Record trace duration # Record trace duration
TRACE_DURATION.labels( TRACE_DURATION.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -546,18 +615,22 @@ class BusinessEvent:
# Decrement concurrent traces gauge # Decrement concurrent traces gauge
CONCURRENT_TRACES.labels( CONCURRENT_TRACES.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).dec() ).dec()
self._push_to_gateway()
if self.llm_metrics['call_count'] > 0: if self.llm_metrics['call_count'] > 0:
self.log_final_metrics() self.log_final_metrics()
self.reset_llm_metrics() self.reset_llm_metrics()
self.log(f'Ending Trace for {self.event_type}', extra_fields={'trace_duration': trace_total_time}) self.log(f'Ending Trace for {self.event_type}', extra_fields={'trace_duration': trace_total_time})
self._flush_log_buffer() self._flush_log_buffer()
return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb) return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb)
async def __aenter__(self): async def __aenter__(self):
@@ -571,7 +644,7 @@ class BusinessEvent:
# Record trace duration # Record trace duration
TRACE_DURATION.labels( TRACE_DURATION.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
@@ -580,12 +653,14 @@ class BusinessEvent:
# Decrement concurrent traces gauge # Decrement concurrent traces gauge
CONCURRENT_TRACES.labels( CONCURRENT_TRACES.labels(
tenant_id=self.tenant_id_str, tenant_id=self.tenant_id_str,
event_type=self.event_type, event_type=self.event_type_str,
specialist_id=self.specialist_id_str, specialist_id=self.specialist_id_str,
specialist_type=self.specialist_type_str, specialist_type=self.specialist_type_str,
specialist_type_version=self.specialist_type_version_str specialist_type_version=self.specialist_type_version_str
).dec() ).dec()
self._push_to_gateway()
if self.llm_metrics['call_count'] > 0: if self.llm_metrics['call_count'] > 0:
self.log_final_metrics() self.log_final_metrics()
self.reset_llm_metrics() self.reset_llm_metrics()

View File

@@ -1,59 +0,0 @@
import time
import threading
from contextlib import contextmanager
from functools import wraps
from prometheus_client import Counter, Histogram, Summary, start_http_server, Gauge
from flask import current_app, g, request, Flask
class EveAIMetrics:
"""
Central class for Prometheus metrics infrastructure.
This class initializes the Prometheus HTTP server and provides
shared functionality for metrics across components.
Component-specific metrics should be defined in their respective modules.
"""
def __init__(self, app: Flask = None):
self.app = app
self._metrics_server_started = False
if app is not None:
self.init_app(app)
def init_app(self, app: Flask):
"""Initialize metrics with Flask app and start Prometheus server"""
self.app = app
self._start_metrics_server()
def _start_metrics_server(self):
"""Start the Prometheus metrics HTTP server if not already running"""
if not self._metrics_server_started:
try:
metrics_port = self.app.config.get('PROMETHEUS_PORT', 8000)
start_http_server(metrics_port)
self.app.logger.info(f"Prometheus metrics server started on port {metrics_port}")
self._metrics_server_started = True
except Exception as e:
self.app.logger.error(f"Failed to start metrics server: {e}")
@staticmethod
def get_standard_buckets():
"""
Return the standard duration buckets for histogram metrics.
Components should use these for consistency across the system.
"""
return [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')]
@staticmethod
def sanitize_label_values(labels_dict):
"""
Convert all label values to strings as required by Prometheus.
Args:
labels_dict: Dictionary of label name to label value
Returns:
Dictionary with all values converted to strings
"""
return {k: str(v) if v is not None else "" for k, v in labels_dict.items()}

View File

@@ -0,0 +1,11 @@
from flask import current_app
from prometheus_client import push_to_gateway
def sanitize_label(value):
"""Convert value to valid Prometheus label by removing/replacing invalid chars"""
if value is None:
return ""
# Replace spaces and special chars with underscores
import re
return re.sub(r'[^a-zA-Z0-9_]', '_', str(value))

View File

@@ -14,7 +14,17 @@ class Config(object):
SECRET_KEY = environ.get('SECRET_KEY') SECRET_KEY = environ.get('SECRET_KEY')
SESSION_COOKIE_SECURE = False SESSION_COOKIE_SECURE = False
SESSION_COOKIE_HTTPONLY = True SESSION_COOKIE_HTTPONLY = True
SESSION_KEY_PREFIX = f'{environ.get('COMPONENT_NAME')}_' COMPONENT_NAME = environ.get('COMPONENT_NAME')
SESSION_KEY_PREFIX = f'{COMPONENT_NAME}_'
# Database Settings
DB_HOST = environ.get('DB_HOST')
DB_USER = environ.get('DB_USER')
DB_PASS = environ.get('DB_PASS')
DB_NAME = environ.get('DB_NAME')
DB_PORT = environ.get('DB_PORT')
SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
WTF_CSRF_ENABLED = True WTF_CSRF_ENABLED = True
WTF_CSRF_TIME_LIMIT = None WTF_CSRF_TIME_LIMIT = None
@@ -154,12 +164,17 @@ class Config(object):
COMPRESSION_PROCESS_DELAY = 1 COMPRESSION_PROCESS_DELAY = 1
# WordPress Integration Settings # WordPress Integration Settings
WORDPRESS_PROTOCOL = os.environ.get('WORDPRESS_PROTOCOL', 'http') WORDPRESS_PROTOCOL = environ.get('WORDPRESS_PROTOCOL', 'http')
WORDPRESS_HOST = os.environ.get('WORDPRESS_HOST', 'host.docker.internal') WORDPRESS_HOST = environ.get('WORDPRESS_HOST', 'host.docker.internal')
WORDPRESS_PORT = os.environ.get('WORDPRESS_PORT', '10003') WORDPRESS_PORT = environ.get('WORDPRESS_PORT', '10003')
WORDPRESS_BASE_URL = f"{WORDPRESS_PROTOCOL}://{WORDPRESS_HOST}:{WORDPRESS_PORT}" WORDPRESS_BASE_URL = f"{WORDPRESS_PROTOCOL}://{WORDPRESS_HOST}:{WORDPRESS_PORT}"
EXTERNAL_WORDPRESS_BASE_URL = 'localhost:10003' EXTERNAL_WORDPRESS_BASE_URL = 'localhost:10003'
# Prometheus PUSH Gataway
PUSH_GATEWAY_HOST = environ.get('PUSH_GATEWAY_HOST', 'pushgateway')
PUSH_GATEWAY_PORT = environ.get('PUSH_GATEWAY_PORT', '9091')
PUSH_GATEWAY_URL = f"{PUSH_GATEWAY_HOST}:{PUSH_GATEWAY_PORT}"
class DevConfig(Config): class DevConfig(Config):
DEVELOPMENT = True DEVELOPMENT = True
@@ -167,14 +182,6 @@ class DevConfig(Config):
FLASK_DEBUG = True FLASK_DEBUG = True
EXPLAIN_TEMPLATE_LOADING = False EXPLAIN_TEMPLATE_LOADING = False
# Database Settings
DB_HOST = environ.get('DB_HOST', 'localhost')
DB_USER = environ.get('DB_USER', 'luke')
DB_PASS = environ.get('DB_PASS', 'Skywalker!')
DB_NAME = environ.get('DB_NAME', 'eveai')
SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:5432/{DB_NAME}'
SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
# Define the nginx prefix used for the specific apps # Define the nginx prefix used for the specific apps
EVEAI_APP_LOCATION_PREFIX = '/admin' EVEAI_APP_LOCATION_PREFIX = '/admin'
EVEAI_CHAT_LOCATION_PREFIX = '/chat' EVEAI_CHAT_LOCATION_PREFIX = '/chat'
@@ -237,7 +244,6 @@ class DevConfig(Config):
class ProdConfig(Config): class ProdConfig(Config):
DEVELOPMENT = False DEVELOPMENT = False
DEBUG = False DEBUG = False
DEBUG = False
FLASK_DEBUG = False FLASK_DEBUG = False
EXPLAIN_TEMPLATE_LOADING = False EXPLAIN_TEMPLATE_LOADING = False
@@ -246,15 +252,6 @@ class ProdConfig(Config):
WTF_CSRF_SSL_STRICT = True # Set to True if using HTTPS WTF_CSRF_SSL_STRICT = True # Set to True if using HTTPS
# Database Settings
DB_HOST = environ.get('DB_HOST')
DB_USER = environ.get('DB_USER')
DB_PASS = environ.get('DB_PASS')
DB_NAME = environ.get('DB_NAME')
DB_PORT = environ.get('DB_PORT')
SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
# flask-mailman settings # flask-mailman settings
MAIL_SERVER = 'mail.askeveai.com' MAIL_SERVER = 'mail.askeveai.com'
MAIL_PORT = 587 MAIL_PORT = 587

View File

@@ -39,6 +39,8 @@ x-common-variables: &common-variables
LANGCHAIN_API_KEY: "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" LANGCHAIN_API_KEY: "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"
SERPER_API_KEY: "e4c553856d0e6b5a171ec5e6b69d874285b9badf" SERPER_API_KEY: "e4c553856d0e6b5a171ec5e6b69d874285b9badf"
CREWAI_STORAGE_DIR: "/app/crewai_storage" CREWAI_STORAGE_DIR: "/app/crewai_storage"
PUSH_GATEWAY_HOST: "pushgateway"
PUSH_GATEWAY_PORT: "9091"
services: services:
nginx: nginx:
@@ -389,6 +391,14 @@ services:
networks: networks:
- eveai-network - eveai-network
pushgateway:
image: prom/pushgateway:latest
restart: unless-stopped
ports:
- "9091:9091"
networks:
- eveai-network
grafana: grafana:
image: grafana/grafana:latest image: grafana/grafana:latest
container_name: grafana container_name: grafana

View File

@@ -32,3 +32,8 @@ scrape_configs:
static_configs: static_configs:
- targets: ['eveai_entitlements:8000'] - targets: ['eveai_entitlements:8000']
scrape_interval: 10s scrape_interval: 10s
- job_name: 'pushgateway'
honor_labels: true
static_configs:
- targets: [ 'pushgateway:9091' ]

View File

@@ -5,7 +5,7 @@ import os
from common.langchain.templates.template_manager import TemplateManager from common.langchain.templates.template_manager import TemplateManager
from common.utils.celery_utils import make_celery, init_celery from common.utils.celery_utils import make_celery, init_celery
from common.extensions import db, template_manager, cache_manager, eveai_metrics from common.extensions import db, template_manager, cache_manager
from config.logging_config import LOGGING from config.logging_config import LOGGING
from config.config import get_config from config.config import get_config
@@ -45,7 +45,6 @@ def register_extensions(app):
db.init_app(app) db.init_app(app)
cache_manager.init_app(app) cache_manager.init_app(app)
template_manager.init_app(app) template_manager.init_app(app)
eveai_metrics.init_app(app)
def register_cache_handlers(app): def register_cache_handlers(app):