- Significantly changed the PDF Processor to use Mistral's OCR model
- ensure very long chunks get split into smaller chunks - ensure TrackedMistralAIEmbedding is batched if needed to ensure correct execution - upgraded some of the packages to a higher version
This commit is contained in:
@@ -106,6 +106,7 @@ class BusinessEvent:
|
||||
'total_tokens': 0,
|
||||
'prompt_tokens': 0,
|
||||
'completion_tokens': 0,
|
||||
'nr_of_pages': 0,
|
||||
'total_time': 0,
|
||||
'call_count': 0,
|
||||
'interaction_type': None
|
||||
@@ -121,13 +122,6 @@ class BusinessEvent:
|
||||
if self.specialist_type_version else ""
|
||||
self.span_name_str = ""
|
||||
|
||||
current_app.logger.debug(f"Labels for metrics: "
|
||||
f"tenant_id={self.tenant_id_str}, "
|
||||
f"event_type={self.event_type_str},"
|
||||
f"specialist_id={self.specialist_id_str}, "
|
||||
f"specialist_type={self.specialist_type_str}, " +
|
||||
f"specialist_type_version={self.specialist_type_version_str}")
|
||||
|
||||
# Increment concurrent events gauge when initialized
|
||||
CONCURRENT_TRACES.labels(
|
||||
tenant_id=self.tenant_id_str,
|
||||
@@ -168,24 +162,17 @@ class BusinessEvent:
|
||||
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attribute}'")
|
||||
|
||||
def update_llm_metrics(self, metrics: dict):
|
||||
self.llm_metrics['total_tokens'] += metrics['total_tokens']
|
||||
self.llm_metrics['prompt_tokens'] += metrics['prompt_tokens']
|
||||
self.llm_metrics['completion_tokens'] += metrics['completion_tokens']
|
||||
self.llm_metrics['total_time'] += metrics['time_elapsed']
|
||||
self.llm_metrics['total_tokens'] += metrics.get('total_tokens', 0)
|
||||
self.llm_metrics['prompt_tokens'] += metrics.get('prompt_tokens', 0)
|
||||
self.llm_metrics['completion_tokens'] += metrics.get('completion_tokens', 0)
|
||||
self.llm_metrics['nr_of_pages'] += metrics.get('nr_of_pages', 0)
|
||||
self.llm_metrics['total_time'] += metrics.get('time_elapsed', 0)
|
||||
self.llm_metrics['call_count'] += 1
|
||||
self.llm_metrics['interaction_type'] = metrics['interaction_type']
|
||||
|
||||
# Track in Prometheus metrics
|
||||
interaction_type_str = sanitize_label(metrics['interaction_type']) if metrics['interaction_type'] else ""
|
||||
|
||||
current_app.logger.debug(f"Labels for metrics: "
|
||||
f"tenant_id={self.tenant_id_str}, "
|
||||
f"event_type={self.event_type_str},"
|
||||
f"interaction_type={interaction_type_str}, "
|
||||
f"specialist_id={self.specialist_id_str}, "
|
||||
f"specialist_type={self.specialist_type_str}, "
|
||||
f"specialist_type_version={self.specialist_type_version_str}")
|
||||
|
||||
# Track token usage
|
||||
LLM_TOKENS_COUNTER.labels(
|
||||
tenant_id=self.tenant_id_str,
|
||||
@@ -195,7 +182,7 @@ class BusinessEvent:
|
||||
specialist_id=self.specialist_id_str,
|
||||
specialist_type=self.specialist_type_str,
|
||||
specialist_type_version=self.specialist_type_version_str
|
||||
).inc(metrics['total_tokens'])
|
||||
).inc(metrics.get('total_tokens', 0))
|
||||
|
||||
LLM_TOKENS_COUNTER.labels(
|
||||
tenant_id=self.tenant_id_str,
|
||||
@@ -205,7 +192,7 @@ class BusinessEvent:
|
||||
specialist_id=self.specialist_id_str,
|
||||
specialist_type=self.specialist_type_str,
|
||||
specialist_type_version=self.specialist_type_version_str
|
||||
).inc(metrics['prompt_tokens'])
|
||||
).inc(metrics.get('prompt_tokens', 0))
|
||||
|
||||
LLM_TOKENS_COUNTER.labels(
|
||||
tenant_id=self.tenant_id_str,
|
||||
@@ -215,7 +202,7 @@ class BusinessEvent:
|
||||
specialist_id=self.specialist_id_str,
|
||||
specialist_type=self.specialist_type_str,
|
||||
specialist_type_version=self.specialist_type_version_str
|
||||
).inc(metrics['completion_tokens'])
|
||||
).inc(metrics.get('completion_tokens', 0))
|
||||
|
||||
# Track duration
|
||||
LLM_DURATION.labels(
|
||||
@@ -225,7 +212,7 @@ class BusinessEvent:
|
||||
specialist_id=self.specialist_id_str,
|
||||
specialist_type=self.specialist_type_str,
|
||||
specialist_type_version=self.specialist_type_version_str
|
||||
).observe(metrics['time_elapsed'])
|
||||
).observe(metrics.get('time_elapsed', 0))
|
||||
|
||||
# Track call count
|
||||
LLM_CALLS_COUNTER.labels(
|
||||
@@ -243,6 +230,7 @@ class BusinessEvent:
|
||||
self.llm_metrics['total_tokens'] = 0
|
||||
self.llm_metrics['prompt_tokens'] = 0
|
||||
self.llm_metrics['completion_tokens'] = 0
|
||||
self.llm_metrics['nr_of_pages'] = 0
|
||||
self.llm_metrics['total_time'] = 0
|
||||
self.llm_metrics['call_count'] = 0
|
||||
self.llm_metrics['interaction_type'] = None
|
||||
@@ -270,14 +258,6 @@ class BusinessEvent:
|
||||
# Track start time for the span
|
||||
span_start_time = time.time()
|
||||
|
||||
current_app.logger.debug(f"Labels for metrics: "
|
||||
f"tenant_id={self.tenant_id_str}, "
|
||||
f"event_type={self.event_type_str}, "
|
||||
f"activity_name={self.span_name_str}, "
|
||||
f"specialist_id={self.specialist_id_str}, "
|
||||
f"specialist_type={self.specialist_type_str}, "
|
||||
f"specialist_type_version={self.specialist_type_version_str}")
|
||||
|
||||
# Increment span metrics - using span_name as activity_name for metrics
|
||||
SPAN_COUNTER.labels(
|
||||
tenant_id=self.tenant_id_str,
|
||||
@@ -363,14 +343,6 @@ class BusinessEvent:
|
||||
# Track start time for the span
|
||||
span_start_time = time.time()
|
||||
|
||||
current_app.logger.debug(f"Labels for metrics: "
|
||||
f"tenant_id={self.tenant_id_str}, "
|
||||
f"event_type={self.event_type_str}, "
|
||||
f"activity_name={self.span_name_str}, "
|
||||
f"specialist_id={self.specialist_id_str}, "
|
||||
f"specialist_type={self.specialist_type_str}, "
|
||||
f"specialist_type_version={self.specialist_type_version_str}")
|
||||
|
||||
# Increment span metrics - using span_name as activity_name for metrics
|
||||
SPAN_COUNTER.labels(
|
||||
tenant_id=self.tenant_id_str,
|
||||
@@ -487,10 +459,11 @@ class BusinessEvent:
|
||||
'specialist_type': self.specialist_type,
|
||||
'specialist_type_version': self.specialist_type_version,
|
||||
'environment': self.environment,
|
||||
'llm_metrics_total_tokens': metrics['total_tokens'],
|
||||
'llm_metrics_prompt_tokens': metrics['prompt_tokens'],
|
||||
'llm_metrics_completion_tokens': metrics['completion_tokens'],
|
||||
'llm_metrics_total_time': metrics['time_elapsed'],
|
||||
'llm_metrics_total_tokens': metrics.get('total_tokens', 0),
|
||||
'llm_metrics_prompt_tokens': metrics.get('prompt_tokens', 0),
|
||||
'llm_metrics_completion_tokens': metrics.get('completion_tokens', 0),
|
||||
'llm_metrics_nr_of_pages': metrics.get('nr_of_pages', 0),
|
||||
'llm_metrics_total_time': metrics.get('time_elapsed', 0),
|
||||
'llm_interaction_type': metrics['interaction_type'],
|
||||
'message': message,
|
||||
}
|
||||
@@ -518,6 +491,7 @@ class BusinessEvent:
|
||||
'llm_metrics_total_tokens': self.llm_metrics['total_tokens'],
|
||||
'llm_metrics_prompt_tokens': self.llm_metrics['prompt_tokens'],
|
||||
'llm_metrics_completion_tokens': self.llm_metrics['completion_tokens'],
|
||||
'llm_metrics_nr_of_pages': self.llm_metrics['nr_of_pages'],
|
||||
'llm_metrics_total_time': self.llm_metrics['total_time'],
|
||||
'llm_metrics_call_count': self.llm_metrics['call_count'],
|
||||
'llm_interaction_type': self.llm_metrics['interaction_type'],
|
||||
|
||||
Reference in New Issue
Block a user