- Translations completed for Front-End, Configs (e.g. Forms) and free text.

- Allowed_languages and default_language now part of Tenant Make iso Tenant - Introduction of Translation into Traicie Selection Specialist
2025-06-30 14:20:17 +02:00
parent 4338f09f5c
commit fbc9f44ac8
34 changed files with 1206 additions and 220 deletions
--- a/common/utils/cache/regions.py
+++ b/common/utils/cache/regions.py
@@ -42,7 +42,7 @@ def create_cache_regions(app):
    # Region for model-related caching (ModelVariables etc)
    model_region = make_region(name='eveai_model').configure(
        'dogpile.cache.redis',
-        arguments=redis_config,
+        arguments={**redis_config, 'db': 6},
        replace_existing_backend=True
    )
    regions['eveai_model'] = model_region
--- a/common/utils/cache/translation_cache.py
+++ b/common/utils/cache/translation_cache.py
@@ -1,19 +1,25 @@
 import json
+import re
 from typing import Dict, Any, Optional
 from datetime import datetime as dt, timezone as tz

 import xxhash
 from flask import current_app
-from sqlalchemy import and_
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
 from sqlalchemy.inspection import inspect

+from common.langchain.persistent_llm_metrics_handler import PersistentLLMMetricsHandler
+from common.utils.business_event_context import current_event
 from common.utils.cache.base import CacheHandler, T
 from common.extensions import db

 from common.models.user import TranslationCache
-from common.services.utils.translation_services import TranslationService
 from flask_security import current_user

+from common.utils.model_utils import get_template
+

 class TranslationCacheHandler(CacheHandler[TranslationCache]):
    """Handles caching of translations with fallback to database and external translation service"""
@@ -62,13 +68,33 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):

                setattr(translation, column.name, value)

+        current_app.logger.debug(f"Translation Cache Retrieved: {translation}")
+        metrics = {
+            'total_tokens': translation.prompt_tokens + translation.completion_tokens,
+            'prompt_tokens': translation.prompt_tokens,
+            'completion_tokens': translation.completion_tokens,
+            'time_elapsed': 0,
+            'interaction_type': 'LLM'
+        }
+        current_event.log_llm_metrics(metrics)
+
        return translation

-    def _should_cache(self, value: TranslationCache) -> bool:
+    def _should_cache(self, value) -> bool:
        """Validate if the translation should be cached"""
-        return value is not None and value.cache_key is not None
+        if value is None:
+            return False

-    def get_translation(self, text: str, target_lang: str, source_lang:str=None, context: str=None) -> Optional[TranslationCache]:
+        # Handle both TranslationCache objects and serialized data (dict)
+        if isinstance(value, TranslationCache):
+            return value.cache_key is not None
+        elif isinstance(value, dict):
+            return value.get('cache_key') is not None
+
+        return False
+
+    def get_translation(self, text: str, target_lang: str, source_lang: str = None, context: str = None) -> Optional[
+        TranslationCache]:
        """
        Get the translation for a text in a specific language

@@ -81,26 +107,33 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
        Returns:
            TranslationCache instance if found, None otherwise
        """
+        if not context:
+            context = 'No context provided.'
+        current_app.logger.debug(f"Getting translation for text: {text[:10]}..., target_lang: {target_lang}, source_lang: {source_lang}, context: {context[:10]}...")

-        def creator_func(text: str, target_lang: str, source_lang: str=None, context: str=None) -> Optional[TranslationCache]:
-            # Generate cache key based on inputs
-            cache_key = self._generate_cache_key(text, target_lang, source_lang, context)
-
+        def creator_func(hash_key: str) -> Optional[TranslationCache]:
            # Check if translation already exists in database
-            existing_translation = db.session.query(TranslationCache).filter_by(cache_key=cache_key).first()
+            existing_translation = db.session.query(TranslationCache).filter_by(cache_key=hash_key).first()

            if existing_translation:
                # Update last used timestamp
                existing_translation.last_used_at = dt.now(tz=tz.utc)
+                metrics = {
+                    'total_tokens': existing_translation.prompt_tokens + existing_translation.completion_tokens,
+                    'prompt_tokens': existing_translation.prompt_tokens,
+                    'completion_tokens': existing_translation.completion_tokens,
+                    'time_elapsed': 0,
+                    'interaction_type': 'LLM'
+                }
+                current_app.logger.debug(f"Found existing translation in DB: {existing_translation.cache_key}")
+                current_app.logger.debug(f"Metrics: {metrics}")
+                current_event.log_llm_metrics(metrics)
                db.session.commit()
                return existing_translation

            # Translation not found in DB, need to create it
-            # Initialize translation service
-            translation_service = TranslationService(getattr(current_app, 'tenant_id', None))
-
            # Get the translation and metrics
-            translated_text, metrics = translation_service.translate_text(
+            translated_text, metrics = self.translate_text(
                text_to_translate=text,
                target_lang=target_lang,
                source_lang=source_lang,
@@ -109,10 +142,10 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):

            # Create new translation cache record
            new_translation = TranslationCache(
-                cache_key=cache_key,
+                cache_key=hash_key,
                source_text=text,
                translated_text=translated_text,
-                source_language=source_lang or 'auto',
+                source_language=source_lang,
                target_language=target_lang,
                context=context,
                prompt_tokens=metrics.get('prompt_tokens', 0),
@@ -130,7 +163,12 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):

            return new_translation

-        return self.get(creator_func, text=text, target_lang=target_lang, source_lang=source_lang, context=context)
+        # Generate the hash key using your existing method
+        hash_key = self._generate_cache_key(text, target_lang, source_lang, context)
+        current_app.logger.debug(f"Generated hash key: {hash_key}")
+
+        # Pass the hash_key to the get method
+        return self.get(creator_func, hash_key=hash_key)

    def invalidate_tenant_translations(self, tenant_id: int):
        """Invalidate cached translations for specific tenant"""
@@ -148,6 +186,41 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
        cache_string = json.dumps(cache_data, sort_keys=True, ensure_ascii=False)
        return xxhash.xxh64(cache_string.encode('utf-8')).hexdigest()

+    def translate_text(self, text_to_translate: str, target_lang: str, source_lang: str = None, context: str = None) \
+            -> tuple[str, dict[str, int | float]]:
+        target_language = current_app.config['SUPPORTED_LANGUAGE_ISO639_1_LOOKUP'][target_lang]
+        current_app.logger.debug(f"Target language: {target_language}")
+        prompt_params = {
+            "text_to_translate": text_to_translate,
+            "target_language": target_language,
+        }
+        if context:
+            template, llm = get_template("translation_with_context")
+            prompt_params["context"] = context
+        else:
+            template, llm = get_template("translation_without_context")
+
+        # Add a metrics handler to capture usage
+
+        metrics_handler = PersistentLLMMetricsHandler()
+        existing_callbacks = llm.callbacks
+        llm.callbacks = existing_callbacks + [metrics_handler]
+
+        translation_prompt = ChatPromptTemplate.from_template(template)
+
+        setup = RunnablePassthrough()
+
+        chain = (setup | translation_prompt | llm | StrOutputParser())
+
+        translation = chain.invoke(prompt_params)
+
+        # Remove double square brackets from translation
+        translation = re.sub(r'\[\[(.*?)\]\]', r'\1', translation)
+
+        metrics = metrics_handler.get_metrics()
+
+        return translation, metrics
+
 def register_translation_cache_handlers(cache_manager) -> None:
    """Register translation cache handlers with cache manager"""
    cache_manager.register_handler(