- Prometheus metrics go via pushgateway, as different worker processes might have different registries that are not picked up by Prometheus
This commit is contained in:
32
CHANGELOG.md
32
CHANGELOG.md
@@ -25,6 +25,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
### Security
|
### Security
|
||||||
- In case of vulnerabilities.
|
- In case of vulnerabilities.
|
||||||
|
|
||||||
|
## [2.2.0-alfa]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Mistral AI as main provider for embeddings, chains and specialists
|
||||||
|
- Usage measuring for specialists
|
||||||
|
- RAG from chain to specialist technology
|
||||||
|
- Dossier catalog management possibilities added to eveai_app
|
||||||
|
- Asset definition (Paused - other priorities)
|
||||||
|
- Prometheus and Grafana
|
||||||
|
- Add prometheus monitoring to business events
|
||||||
|
- Asynchronous execution of specialists
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Moved choice for AI providers / models to specialists and prompts
|
||||||
|
- Improve RAG to not repeat historic answers
|
||||||
|
- Fixed embedding model, no more choices allowed
|
||||||
|
- clean url (of tracking parameters) before adding it to a catalog
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
- For soon-to-be removed features.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Add Multiple URLs removed from menu
|
||||||
|
- Old Specialist items removed from interaction menu
|
||||||
|
-
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Set default language when registering Documents or URLs.
|
||||||
|
|
||||||
|
### Security
|
||||||
|
- In case of vulnerabilities.
|
||||||
|
|
||||||
## [2.1.0-alfa]
|
## [2.1.0-alfa]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ from .langchain.templates.template_manager import TemplateManager
|
|||||||
from .utils.cache.eveai_cache_manager import EveAICacheManager
|
from .utils.cache.eveai_cache_manager import EveAICacheManager
|
||||||
from .utils.simple_encryption import SimpleEncryption
|
from .utils.simple_encryption import SimpleEncryption
|
||||||
from .utils.minio_utils import MinioClient
|
from .utils.minio_utils import MinioClient
|
||||||
from .utils.performance_monitoring import EveAIMetrics
|
|
||||||
|
|
||||||
|
|
||||||
# Create extensions
|
# Create extensions
|
||||||
@@ -35,5 +34,4 @@ minio_client = MinioClient()
|
|||||||
metrics = PrometheusMetrics.for_app_factory()
|
metrics = PrometheusMetrics.for_app_factory()
|
||||||
template_manager = TemplateManager()
|
template_manager = TemplateManager()
|
||||||
cache_manager = EveAICacheManager()
|
cache_manager = EveAICacheManager()
|
||||||
eveai_metrics = EveAIMetrics()
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,16 +6,18 @@ from datetime import datetime
|
|||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List
|
||||||
from datetime import datetime as dt, timezone as tz
|
from datetime import datetime as dt, timezone as tz
|
||||||
import logging
|
import logging
|
||||||
from prometheus_client import Counter, Histogram, Gauge, Summary
|
|
||||||
|
from flask import current_app
|
||||||
|
from prometheus_client import Counter, Histogram, Gauge, Summary, push_to_gateway, REGISTRY
|
||||||
|
|
||||||
from .business_event_context import BusinessEventContext
|
from .business_event_context import BusinessEventContext
|
||||||
from common.models.entitlements import BusinessEventLog
|
from common.models.entitlements import BusinessEventLog
|
||||||
from common.extensions import db
|
from common.extensions import db
|
||||||
from .celery_utils import current_celery
|
from .celery_utils import current_celery
|
||||||
from common.utils.performance_monitoring import EveAIMetrics
|
from common.utils.prometheus_utils import sanitize_label
|
||||||
|
|
||||||
# Standard duration buckets for all histograms
|
# Standard duration buckets for all histograms
|
||||||
DURATION_BUCKETS = EveAIMetrics.get_standard_buckets()
|
DURATION_BUCKETS = [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')]
|
||||||
|
|
||||||
# Prometheus metrics for business events
|
# Prometheus metrics for business events
|
||||||
TRACE_COUNTER = Counter(
|
TRACE_COUNTER = Counter(
|
||||||
@@ -112,14 +114,24 @@ class BusinessEvent:
|
|||||||
|
|
||||||
# Prometheus label values must be strings
|
# Prometheus label values must be strings
|
||||||
self.tenant_id_str = str(self.tenant_id)
|
self.tenant_id_str = str(self.tenant_id)
|
||||||
|
self.event_type_str = sanitize_label(self.event_type)
|
||||||
self.specialist_id_str = str(self.specialist_id) if self.specialist_id else ""
|
self.specialist_id_str = str(self.specialist_id) if self.specialist_id else ""
|
||||||
self.specialist_type_str = str(self.specialist_type) if self.specialist_type else ""
|
self.specialist_type_str = str(self.specialist_type) if self.specialist_type else ""
|
||||||
self.specialist_type_version_str = str(self.specialist_type_version) if self.specialist_type_version else ""
|
self.specialist_type_version_str = sanitize_label(str(self.specialist_type_version)) \
|
||||||
|
if self.specialist_type_version else ""
|
||||||
|
self.span_name_str = ""
|
||||||
|
|
||||||
|
current_app.logger.debug(f"Labels for metrics: "
|
||||||
|
f"tenant_id={self.tenant_id_str}, "
|
||||||
|
f"event_type={self.event_type_str},"
|
||||||
|
f"specialist_id={self.specialist_id_str}, "
|
||||||
|
f"specialist_type={self.specialist_type_str}, " +
|
||||||
|
f"specialist_type_version={self.specialist_type_version_str}")
|
||||||
|
|
||||||
# Increment concurrent events gauge when initialized
|
# Increment concurrent events gauge when initialized
|
||||||
CONCURRENT_TRACES.labels(
|
CONCURRENT_TRACES.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -128,12 +140,14 @@ class BusinessEvent:
|
|||||||
# Increment trace counter
|
# Increment trace counter
|
||||||
TRACE_COUNTER.labels(
|
TRACE_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
def update_attribute(self, attribute: str, value: any):
|
def update_attribute(self, attribute: str, value: any):
|
||||||
if hasattr(self, attribute):
|
if hasattr(self, attribute):
|
||||||
setattr(self, attribute, value)
|
setattr(self, attribute, value)
|
||||||
@@ -143,9 +157,13 @@ class BusinessEvent:
|
|||||||
elif attribute == 'specialist_type':
|
elif attribute == 'specialist_type':
|
||||||
self.specialist_type_str = str(value) if value else ""
|
self.specialist_type_str = str(value) if value else ""
|
||||||
elif attribute == 'specialist_type_version':
|
elif attribute == 'specialist_type_version':
|
||||||
self.specialist_type_version_str = str(value) if value else ""
|
self.specialist_type_version_str = sanitize_label(str(value)) if value else ""
|
||||||
elif attribute == 'tenant_id':
|
elif attribute == 'tenant_id':
|
||||||
self.tenant_id_str = str(value)
|
self.tenant_id_str = str(value)
|
||||||
|
elif attribute == 'event_type':
|
||||||
|
self.event_type_str = sanitize_label(value)
|
||||||
|
elif attribute == 'span_name':
|
||||||
|
self.span_name_str = sanitize_label(value)
|
||||||
else:
|
else:
|
||||||
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'")
|
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'")
|
||||||
|
|
||||||
@@ -158,13 +176,21 @@ class BusinessEvent:
|
|||||||
self.llm_metrics['interaction_type'] = metrics['interaction_type']
|
self.llm_metrics['interaction_type'] = metrics['interaction_type']
|
||||||
|
|
||||||
# Track in Prometheus metrics
|
# Track in Prometheus metrics
|
||||||
interaction_type = metrics['interaction_type']
|
interaction_type_str = sanitize_label(metrics['interaction_type']) if metrics['interaction_type'] else ""
|
||||||
|
|
||||||
|
current_app.logger.debug(f"Labels for metrics: "
|
||||||
|
f"tenant_id={self.tenant_id_str}, "
|
||||||
|
f"event_type={self.event_type_str},"
|
||||||
|
f"interaction_type={interaction_type_str}, "
|
||||||
|
f"specialist_id={self.specialist_id_str}, "
|
||||||
|
f"specialist_type={self.specialist_type_str}, "
|
||||||
|
f"specialist_type_version={self.specialist_type_version_str}")
|
||||||
|
|
||||||
# Track token usage
|
# Track token usage
|
||||||
LLM_TOKENS_COUNTER.labels(
|
LLM_TOKENS_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
interaction_type=interaction_type,
|
interaction_type=interaction_type_str,
|
||||||
token_type='total',
|
token_type='total',
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
@@ -173,8 +199,8 @@ class BusinessEvent:
|
|||||||
|
|
||||||
LLM_TOKENS_COUNTER.labels(
|
LLM_TOKENS_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
interaction_type=interaction_type,
|
interaction_type=interaction_type_str,
|
||||||
token_type='prompt',
|
token_type='prompt',
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
@@ -183,8 +209,8 @@ class BusinessEvent:
|
|||||||
|
|
||||||
LLM_TOKENS_COUNTER.labels(
|
LLM_TOKENS_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
interaction_type=interaction_type,
|
interaction_type=interaction_type_str,
|
||||||
token_type='completion',
|
token_type='completion',
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
@@ -194,8 +220,8 @@ class BusinessEvent:
|
|||||||
# Track duration
|
# Track duration
|
||||||
LLM_DURATION.labels(
|
LLM_DURATION.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
interaction_type=interaction_type,
|
interaction_type=interaction_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -204,13 +230,15 @@ class BusinessEvent:
|
|||||||
# Track call count
|
# Track call count
|
||||||
LLM_CALLS_COUNTER.labels(
|
LLM_CALLS_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
interaction_type=interaction_type,
|
interaction_type=interaction_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
def reset_llm_metrics(self):
|
def reset_llm_metrics(self):
|
||||||
self.llm_metrics['total_tokens'] = 0
|
self.llm_metrics['total_tokens'] = 0
|
||||||
self.llm_metrics['prompt_tokens'] = 0
|
self.llm_metrics['prompt_tokens'] = 0
|
||||||
@@ -236,16 +264,25 @@ class BusinessEvent:
|
|||||||
# Set the new span info
|
# Set the new span info
|
||||||
self.span_id = new_span_id
|
self.span_id = new_span_id
|
||||||
self.span_name = span_name
|
self.span_name = span_name
|
||||||
|
self.span_name_str = sanitize_label(span_name) if span_name else ""
|
||||||
self.parent_span_id = parent_span_id
|
self.parent_span_id = parent_span_id
|
||||||
|
|
||||||
# Track start time for the span
|
# Track start time for the span
|
||||||
span_start_time = time.time()
|
span_start_time = time.time()
|
||||||
|
|
||||||
|
current_app.logger.debug(f"Labels for metrics: "
|
||||||
|
f"tenant_id={self.tenant_id_str}, "
|
||||||
|
f"event_type={self.event_type_str}, "
|
||||||
|
f"activity_name={self.span_name_str}, "
|
||||||
|
f"specialist_id={self.specialist_id_str}, "
|
||||||
|
f"specialist_type={self.specialist_type_str}, "
|
||||||
|
f"specialist_type_version={self.specialist_type_version_str}")
|
||||||
|
|
||||||
# Increment span metrics - using span_name as activity_name for metrics
|
# Increment span metrics - using span_name as activity_name for metrics
|
||||||
SPAN_COUNTER.labels(
|
SPAN_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -254,13 +291,15 @@ class BusinessEvent:
|
|||||||
# Increment concurrent spans gauge
|
# Increment concurrent spans gauge
|
||||||
CONCURRENT_SPANS.labels(
|
CONCURRENT_SPANS.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
self.log(f"Start")
|
self.log(f"Start")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -272,8 +311,8 @@ class BusinessEvent:
|
|||||||
# Observe span duration
|
# Observe span duration
|
||||||
SPAN_DURATION.labels(
|
SPAN_DURATION.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -282,13 +321,15 @@ class BusinessEvent:
|
|||||||
# Decrement concurrent spans gauge
|
# Decrement concurrent spans gauge
|
||||||
CONCURRENT_SPANS.labels(
|
CONCURRENT_SPANS.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).dec()
|
).dec()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
if self.llm_metrics['call_count'] > 0:
|
if self.llm_metrics['call_count'] > 0:
|
||||||
self.log_final_metrics()
|
self.log_final_metrics()
|
||||||
self.reset_llm_metrics()
|
self.reset_llm_metrics()
|
||||||
@@ -296,10 +337,12 @@ class BusinessEvent:
|
|||||||
# Restore the previous span info
|
# Restore the previous span info
|
||||||
if self.spans:
|
if self.spans:
|
||||||
self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
|
self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
|
||||||
|
self.span_name_str = sanitize_label(span_name) if span_name else ""
|
||||||
else:
|
else:
|
||||||
self.span_id = None
|
self.span_id = None
|
||||||
self.span_name = None
|
self.span_name = None
|
||||||
self.parent_span_id = None
|
self.parent_span_id = None
|
||||||
|
self.span_name_str = ""
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def create_span_async(self, span_name: str):
|
async def create_span_async(self, span_name: str):
|
||||||
@@ -314,16 +357,25 @@ class BusinessEvent:
|
|||||||
# Set the new span info
|
# Set the new span info
|
||||||
self.span_id = new_span_id
|
self.span_id = new_span_id
|
||||||
self.span_name = span_name
|
self.span_name = span_name
|
||||||
|
self.span_name_str = sanitize_label(span_name) if span_name else ""
|
||||||
self.parent_span_id = parent_span_id
|
self.parent_span_id = parent_span_id
|
||||||
|
|
||||||
# Track start time for the span
|
# Track start time for the span
|
||||||
span_start_time = time.time()
|
span_start_time = time.time()
|
||||||
|
|
||||||
|
current_app.logger.debug(f"Labels for metrics: "
|
||||||
|
f"tenant_id={self.tenant_id_str}, "
|
||||||
|
f"event_type={self.event_type_str}, "
|
||||||
|
f"activity_name={self.span_name_str}, "
|
||||||
|
f"specialist_id={self.specialist_id_str}, "
|
||||||
|
f"specialist_type={self.specialist_type_str}, "
|
||||||
|
f"specialist_type_version={self.specialist_type_version_str}")
|
||||||
|
|
||||||
# Increment span metrics - using span_name as activity_name for metrics
|
# Increment span metrics - using span_name as activity_name for metrics
|
||||||
SPAN_COUNTER.labels(
|
SPAN_COUNTER.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -332,13 +384,15 @@ class BusinessEvent:
|
|||||||
# Increment concurrent spans gauge
|
# Increment concurrent spans gauge
|
||||||
CONCURRENT_SPANS.labels(
|
CONCURRENT_SPANS.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).inc()
|
).inc()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
self.log(f"Start")
|
self.log(f"Start")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -350,8 +404,8 @@ class BusinessEvent:
|
|||||||
# Observe span duration
|
# Observe span duration
|
||||||
SPAN_DURATION.labels(
|
SPAN_DURATION.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -360,13 +414,15 @@ class BusinessEvent:
|
|||||||
# Decrement concurrent spans gauge
|
# Decrement concurrent spans gauge
|
||||||
CONCURRENT_SPANS.labels(
|
CONCURRENT_SPANS.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
activity_name=span_name,
|
activity_name=self.span_name_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).dec()
|
).dec()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
if self.llm_metrics['call_count'] > 0:
|
if self.llm_metrics['call_count'] > 0:
|
||||||
self.log_final_metrics()
|
self.log_final_metrics()
|
||||||
self.reset_llm_metrics()
|
self.reset_llm_metrics()
|
||||||
@@ -374,10 +430,12 @@ class BusinessEvent:
|
|||||||
# Restore the previous span info
|
# Restore the previous span info
|
||||||
if self.spans:
|
if self.spans:
|
||||||
self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
|
self.span_id, self.span_name, self.parent_span_id = self.spans.pop()
|
||||||
|
self.span_name_str = sanitize_label(span_name) if span_name else ""
|
||||||
else:
|
else:
|
||||||
self.span_id = None
|
self.span_id = None
|
||||||
self.span_name = None
|
self.span_name = None
|
||||||
self.parent_span_id = None
|
self.parent_span_id = None
|
||||||
|
self.span_name_str = ""
|
||||||
|
|
||||||
def log(self, message: str, level: str = 'info', extra_fields: Dict[str, Any] = None):
|
def log(self, message: str, level: str = 'info', extra_fields: Dict[str, Any] = None):
|
||||||
log_data = {
|
log_data = {
|
||||||
@@ -526,6 +584,17 @@ class BusinessEvent:
|
|||||||
# Clear the buffer after sending
|
# Clear the buffer after sending
|
||||||
self._log_buffer = []
|
self._log_buffer = []
|
||||||
|
|
||||||
|
def _push_to_gateway(self):
|
||||||
|
# Push metrics to the gateway
|
||||||
|
try:
|
||||||
|
push_to_gateway(
|
||||||
|
current_app.config['PUSH_GATEWAY_URL'],
|
||||||
|
job=current_app.config['COMPONENT_NAME'],
|
||||||
|
registry=REGISTRY
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f"Failed to push metrics to Prometheus Push Gateway: {e}")
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.trace_start_time = time.time()
|
self.trace_start_time = time.time()
|
||||||
self.log(f'Starting Trace for {self.event_type}')
|
self.log(f'Starting Trace for {self.event_type}')
|
||||||
@@ -537,7 +606,7 @@ class BusinessEvent:
|
|||||||
# Record trace duration
|
# Record trace duration
|
||||||
TRACE_DURATION.labels(
|
TRACE_DURATION.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -546,18 +615,22 @@ class BusinessEvent:
|
|||||||
# Decrement concurrent traces gauge
|
# Decrement concurrent traces gauge
|
||||||
CONCURRENT_TRACES.labels(
|
CONCURRENT_TRACES.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).dec()
|
).dec()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
if self.llm_metrics['call_count'] > 0:
|
if self.llm_metrics['call_count'] > 0:
|
||||||
self.log_final_metrics()
|
self.log_final_metrics()
|
||||||
self.reset_llm_metrics()
|
self.reset_llm_metrics()
|
||||||
|
|
||||||
self.log(f'Ending Trace for {self.event_type}', extra_fields={'trace_duration': trace_total_time})
|
self.log(f'Ending Trace for {self.event_type}', extra_fields={'trace_duration': trace_total_time})
|
||||||
self._flush_log_buffer()
|
self._flush_log_buffer()
|
||||||
|
|
||||||
|
|
||||||
return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb)
|
return BusinessEventContext(self).__exit__(exc_type, exc_val, exc_tb)
|
||||||
|
|
||||||
async def __aenter__(self):
|
async def __aenter__(self):
|
||||||
@@ -571,7 +644,7 @@ class BusinessEvent:
|
|||||||
# Record trace duration
|
# Record trace duration
|
||||||
TRACE_DURATION.labels(
|
TRACE_DURATION.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
@@ -580,12 +653,14 @@ class BusinessEvent:
|
|||||||
# Decrement concurrent traces gauge
|
# Decrement concurrent traces gauge
|
||||||
CONCURRENT_TRACES.labels(
|
CONCURRENT_TRACES.labels(
|
||||||
tenant_id=self.tenant_id_str,
|
tenant_id=self.tenant_id_str,
|
||||||
event_type=self.event_type,
|
event_type=self.event_type_str,
|
||||||
specialist_id=self.specialist_id_str,
|
specialist_id=self.specialist_id_str,
|
||||||
specialist_type=self.specialist_type_str,
|
specialist_type=self.specialist_type_str,
|
||||||
specialist_type_version=self.specialist_type_version_str
|
specialist_type_version=self.specialist_type_version_str
|
||||||
).dec()
|
).dec()
|
||||||
|
|
||||||
|
self._push_to_gateway()
|
||||||
|
|
||||||
if self.llm_metrics['call_count'] > 0:
|
if self.llm_metrics['call_count'] > 0:
|
||||||
self.log_final_metrics()
|
self.log_final_metrics()
|
||||||
self.reset_llm_metrics()
|
self.reset_llm_metrics()
|
||||||
|
|||||||
@@ -1,59 +0,0 @@
|
|||||||
import time
|
|
||||||
import threading
|
|
||||||
from contextlib import contextmanager
|
|
||||||
from functools import wraps
|
|
||||||
from prometheus_client import Counter, Histogram, Summary, start_http_server, Gauge
|
|
||||||
from flask import current_app, g, request, Flask
|
|
||||||
|
|
||||||
|
|
||||||
class EveAIMetrics:
|
|
||||||
"""
|
|
||||||
Central class for Prometheus metrics infrastructure.
|
|
||||||
This class initializes the Prometheus HTTP server and provides
|
|
||||||
shared functionality for metrics across components.
|
|
||||||
|
|
||||||
Component-specific metrics should be defined in their respective modules.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, app: Flask = None):
|
|
||||||
self.app = app
|
|
||||||
self._metrics_server_started = False
|
|
||||||
if app is not None:
|
|
||||||
self.init_app(app)
|
|
||||||
|
|
||||||
def init_app(self, app: Flask):
|
|
||||||
"""Initialize metrics with Flask app and start Prometheus server"""
|
|
||||||
self.app = app
|
|
||||||
self._start_metrics_server()
|
|
||||||
|
|
||||||
def _start_metrics_server(self):
|
|
||||||
"""Start the Prometheus metrics HTTP server if not already running"""
|
|
||||||
if not self._metrics_server_started:
|
|
||||||
try:
|
|
||||||
metrics_port = self.app.config.get('PROMETHEUS_PORT', 8000)
|
|
||||||
start_http_server(metrics_port)
|
|
||||||
self.app.logger.info(f"Prometheus metrics server started on port {metrics_port}")
|
|
||||||
self._metrics_server_started = True
|
|
||||||
except Exception as e:
|
|
||||||
self.app.logger.error(f"Failed to start metrics server: {e}")
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_standard_buckets():
|
|
||||||
"""
|
|
||||||
Return the standard duration buckets for histogram metrics.
|
|
||||||
Components should use these for consistency across the system.
|
|
||||||
"""
|
|
||||||
return [0.1, 0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 240, 360, float('inf')]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def sanitize_label_values(labels_dict):
|
|
||||||
"""
|
|
||||||
Convert all label values to strings as required by Prometheus.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
labels_dict: Dictionary of label name to label value
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with all values converted to strings
|
|
||||||
"""
|
|
||||||
return {k: str(v) if v is not None else "" for k, v in labels_dict.items()}
|
|
||||||
11
common/utils/prometheus_utils.py
Normal file
11
common/utils/prometheus_utils.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from flask import current_app
|
||||||
|
from prometheus_client import push_to_gateway
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_label(value):
|
||||||
|
"""Convert value to valid Prometheus label by removing/replacing invalid chars"""
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
# Replace spaces and special chars with underscores
|
||||||
|
import re
|
||||||
|
return re.sub(r'[^a-zA-Z0-9_]', '_', str(value))
|
||||||
@@ -14,7 +14,17 @@ class Config(object):
|
|||||||
SECRET_KEY = environ.get('SECRET_KEY')
|
SECRET_KEY = environ.get('SECRET_KEY')
|
||||||
SESSION_COOKIE_SECURE = False
|
SESSION_COOKIE_SECURE = False
|
||||||
SESSION_COOKIE_HTTPONLY = True
|
SESSION_COOKIE_HTTPONLY = True
|
||||||
SESSION_KEY_PREFIX = f'{environ.get('COMPONENT_NAME')}_'
|
COMPONENT_NAME = environ.get('COMPONENT_NAME')
|
||||||
|
SESSION_KEY_PREFIX = f'{COMPONENT_NAME}_'
|
||||||
|
|
||||||
|
# Database Settings
|
||||||
|
DB_HOST = environ.get('DB_HOST')
|
||||||
|
DB_USER = environ.get('DB_USER')
|
||||||
|
DB_PASS = environ.get('DB_PASS')
|
||||||
|
DB_NAME = environ.get('DB_NAME')
|
||||||
|
DB_PORT = environ.get('DB_PORT')
|
||||||
|
SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
|
||||||
|
SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
|
||||||
|
|
||||||
WTF_CSRF_ENABLED = True
|
WTF_CSRF_ENABLED = True
|
||||||
WTF_CSRF_TIME_LIMIT = None
|
WTF_CSRF_TIME_LIMIT = None
|
||||||
@@ -154,12 +164,17 @@ class Config(object):
|
|||||||
COMPRESSION_PROCESS_DELAY = 1
|
COMPRESSION_PROCESS_DELAY = 1
|
||||||
|
|
||||||
# WordPress Integration Settings
|
# WordPress Integration Settings
|
||||||
WORDPRESS_PROTOCOL = os.environ.get('WORDPRESS_PROTOCOL', 'http')
|
WORDPRESS_PROTOCOL = environ.get('WORDPRESS_PROTOCOL', 'http')
|
||||||
WORDPRESS_HOST = os.environ.get('WORDPRESS_HOST', 'host.docker.internal')
|
WORDPRESS_HOST = environ.get('WORDPRESS_HOST', 'host.docker.internal')
|
||||||
WORDPRESS_PORT = os.environ.get('WORDPRESS_PORT', '10003')
|
WORDPRESS_PORT = environ.get('WORDPRESS_PORT', '10003')
|
||||||
WORDPRESS_BASE_URL = f"{WORDPRESS_PROTOCOL}://{WORDPRESS_HOST}:{WORDPRESS_PORT}"
|
WORDPRESS_BASE_URL = f"{WORDPRESS_PROTOCOL}://{WORDPRESS_HOST}:{WORDPRESS_PORT}"
|
||||||
EXTERNAL_WORDPRESS_BASE_URL = 'localhost:10003'
|
EXTERNAL_WORDPRESS_BASE_URL = 'localhost:10003'
|
||||||
|
|
||||||
|
# Prometheus PUSH Gataway
|
||||||
|
PUSH_GATEWAY_HOST = environ.get('PUSH_GATEWAY_HOST', 'pushgateway')
|
||||||
|
PUSH_GATEWAY_PORT = environ.get('PUSH_GATEWAY_PORT', '9091')
|
||||||
|
PUSH_GATEWAY_URL = f"{PUSH_GATEWAY_HOST}:{PUSH_GATEWAY_PORT}"
|
||||||
|
|
||||||
|
|
||||||
class DevConfig(Config):
|
class DevConfig(Config):
|
||||||
DEVELOPMENT = True
|
DEVELOPMENT = True
|
||||||
@@ -167,14 +182,6 @@ class DevConfig(Config):
|
|||||||
FLASK_DEBUG = True
|
FLASK_DEBUG = True
|
||||||
EXPLAIN_TEMPLATE_LOADING = False
|
EXPLAIN_TEMPLATE_LOADING = False
|
||||||
|
|
||||||
# Database Settings
|
|
||||||
DB_HOST = environ.get('DB_HOST', 'localhost')
|
|
||||||
DB_USER = environ.get('DB_USER', 'luke')
|
|
||||||
DB_PASS = environ.get('DB_PASS', 'Skywalker!')
|
|
||||||
DB_NAME = environ.get('DB_NAME', 'eveai')
|
|
||||||
SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:5432/{DB_NAME}'
|
|
||||||
SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
|
|
||||||
|
|
||||||
# Define the nginx prefix used for the specific apps
|
# Define the nginx prefix used for the specific apps
|
||||||
EVEAI_APP_LOCATION_PREFIX = '/admin'
|
EVEAI_APP_LOCATION_PREFIX = '/admin'
|
||||||
EVEAI_CHAT_LOCATION_PREFIX = '/chat'
|
EVEAI_CHAT_LOCATION_PREFIX = '/chat'
|
||||||
@@ -237,7 +244,6 @@ class DevConfig(Config):
|
|||||||
class ProdConfig(Config):
|
class ProdConfig(Config):
|
||||||
DEVELOPMENT = False
|
DEVELOPMENT = False
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
DEBUG = False
|
|
||||||
FLASK_DEBUG = False
|
FLASK_DEBUG = False
|
||||||
EXPLAIN_TEMPLATE_LOADING = False
|
EXPLAIN_TEMPLATE_LOADING = False
|
||||||
|
|
||||||
@@ -246,15 +252,6 @@ class ProdConfig(Config):
|
|||||||
|
|
||||||
WTF_CSRF_SSL_STRICT = True # Set to True if using HTTPS
|
WTF_CSRF_SSL_STRICT = True # Set to True if using HTTPS
|
||||||
|
|
||||||
# Database Settings
|
|
||||||
DB_HOST = environ.get('DB_HOST')
|
|
||||||
DB_USER = environ.get('DB_USER')
|
|
||||||
DB_PASS = environ.get('DB_PASS')
|
|
||||||
DB_NAME = environ.get('DB_NAME')
|
|
||||||
DB_PORT = environ.get('DB_PORT')
|
|
||||||
SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
|
|
||||||
SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
|
|
||||||
|
|
||||||
# flask-mailman settings
|
# flask-mailman settings
|
||||||
MAIL_SERVER = 'mail.askeveai.com'
|
MAIL_SERVER = 'mail.askeveai.com'
|
||||||
MAIL_PORT = 587
|
MAIL_PORT = 587
|
||||||
|
|||||||
@@ -39,6 +39,8 @@ x-common-variables: &common-variables
|
|||||||
LANGCHAIN_API_KEY: "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"
|
LANGCHAIN_API_KEY: "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"
|
||||||
SERPER_API_KEY: "e4c553856d0e6b5a171ec5e6b69d874285b9badf"
|
SERPER_API_KEY: "e4c553856d0e6b5a171ec5e6b69d874285b9badf"
|
||||||
CREWAI_STORAGE_DIR: "/app/crewai_storage"
|
CREWAI_STORAGE_DIR: "/app/crewai_storage"
|
||||||
|
PUSH_GATEWAY_HOST: "pushgateway"
|
||||||
|
PUSH_GATEWAY_PORT: "9091"
|
||||||
|
|
||||||
services:
|
services:
|
||||||
nginx:
|
nginx:
|
||||||
@@ -389,6 +391,14 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- eveai-network
|
- eveai-network
|
||||||
|
|
||||||
|
pushgateway:
|
||||||
|
image: prom/pushgateway:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "9091:9091"
|
||||||
|
networks:
|
||||||
|
- eveai-network
|
||||||
|
|
||||||
grafana:
|
grafana:
|
||||||
image: grafana/grafana:latest
|
image: grafana/grafana:latest
|
||||||
container_name: grafana
|
container_name: grafana
|
||||||
|
|||||||
@@ -32,3 +32,8 @@ scrape_configs:
|
|||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['eveai_entitlements:8000']
|
- targets: ['eveai_entitlements:8000']
|
||||||
scrape_interval: 10s
|
scrape_interval: 10s
|
||||||
|
|
||||||
|
- job_name: 'pushgateway'
|
||||||
|
honor_labels: true
|
||||||
|
static_configs:
|
||||||
|
- targets: [ 'pushgateway:9091' ]
|
||||||
@@ -5,7 +5,7 @@ import os
|
|||||||
|
|
||||||
from common.langchain.templates.template_manager import TemplateManager
|
from common.langchain.templates.template_manager import TemplateManager
|
||||||
from common.utils.celery_utils import make_celery, init_celery
|
from common.utils.celery_utils import make_celery, init_celery
|
||||||
from common.extensions import db, template_manager, cache_manager, eveai_metrics
|
from common.extensions import db, template_manager, cache_manager
|
||||||
from config.logging_config import LOGGING
|
from config.logging_config import LOGGING
|
||||||
from config.config import get_config
|
from config.config import get_config
|
||||||
|
|
||||||
@@ -45,7 +45,6 @@ def register_extensions(app):
|
|||||||
db.init_app(app)
|
db.init_app(app)
|
||||||
cache_manager.init_app(app)
|
cache_manager.init_app(app)
|
||||||
template_manager.init_app(app)
|
template_manager.init_app(app)
|
||||||
eveai_metrics.init_app(app)
|
|
||||||
|
|
||||||
|
|
||||||
def register_cache_handlers(app):
|
def register_cache_handlers(app):
|
||||||
|
|||||||
Reference in New Issue
Block a user