- Translations completed for Front-End, Configs (e.g. Forms) and free text.
- Allowed_languages and default_language now part of Tenant Make iso Tenant - Introduction of Translation into Traicie Selection Specialist
This commit is contained in:
@@ -26,9 +26,6 @@ class Tenant(db.Model):
|
||||
timezone = db.Column(db.String(50), nullable=True, default='UTC')
|
||||
type = db.Column(db.String(20), nullable=True, server_default='Active')
|
||||
|
||||
# language information
|
||||
default_language = db.Column(db.String(2), nullable=True)
|
||||
|
||||
# Entitlements
|
||||
currency = db.Column(db.String(20), nullable=True)
|
||||
storage_dirty = db.Column(db.Boolean, nullable=True, default=False)
|
||||
@@ -61,7 +58,6 @@ class Tenant(db.Model):
|
||||
'website': self.website,
|
||||
'timezone': self.timezone,
|
||||
'type': self.type,
|
||||
'default_language': self.default_language,
|
||||
'currency': self.currency,
|
||||
'default_tenant_make_id': self.default_tenant_make_id,
|
||||
}
|
||||
@@ -186,6 +182,7 @@ class TenantMake(db.Model):
|
||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
||||
website = db.Column(db.String(255), nullable=True)
|
||||
logo_url = db.Column(db.String(255), nullable=True)
|
||||
default_language = db.Column(db.String(2), nullable=True)
|
||||
allowed_languages = db.Column(ARRAY(sa.String(2)), nullable=True)
|
||||
|
||||
# Chat customisation options
|
||||
@@ -209,6 +206,8 @@ class TenantMake(db.Model):
|
||||
'website': self.website,
|
||||
'logo_url': self.logo_url,
|
||||
'chat_customisation_options': self.chat_customisation_options,
|
||||
'allowed_languages': self.allowed_languages,
|
||||
'default_language': self.default_language,
|
||||
}
|
||||
|
||||
|
||||
@@ -327,7 +326,7 @@ class TranslationCache(db.Model):
|
||||
cache_key = db.Column(db.String(16), primary_key=True)
|
||||
source_text = db.Column(db.Text, nullable=False)
|
||||
translated_text = db.Column(db.Text, nullable=False)
|
||||
source_language = db.Column(db.String(2), nullable=False)
|
||||
source_language = db.Column(db.String(2), nullable=True)
|
||||
target_language = db.Column(db.String(2), nullable=False)
|
||||
context = db.Column(db.Text, nullable=True)
|
||||
|
||||
|
||||
@@ -1,43 +1,108 @@
|
||||
import xxhash
|
||||
import json
|
||||
from typing import Dict, Any, Optional
|
||||
from common.extensions import cache_manager
|
||||
from common.utils.business_event import BusinessEvent
|
||||
from common.utils.business_event_context import current_event
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
class TranslationServices:
|
||||
|
||||
from common.langchain.persistent_llm_metrics_handler import PersistentLLMMetricsHandler
|
||||
from common.utils.model_utils import get_template, replace_variable_in_template
|
||||
@staticmethod
|
||||
def translate_config(config_data: Dict[str, Any], field_config: str, target_language: str, source_language: Optional[str] = None, context: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Vertaalt een configuratie op basis van een veld-configuratie.
|
||||
|
||||
class TranslationService:
|
||||
def __init__(self, tenant_id):
|
||||
self.tenant_id = tenant_id
|
||||
Args:
|
||||
config_data: Een dictionary of JSON (die dan wordt geconverteerd naar een dictionary) met configuratiegegevens
|
||||
field_config: De naam van een veld-configuratie (bijv. 'fields')
|
||||
target_language: De taal waarnaar vertaald moet worden
|
||||
source_language: Optioneel, de brontaal van de configuratie
|
||||
context: Optioneel, een specifieke context voor de vertaling
|
||||
|
||||
def translate_text(self, text_to_translate: str, target_lang: str, source_lang: str = None, context: str = None) -> tuple[
|
||||
str, dict[str, int | float]]:
|
||||
prompt_params = {
|
||||
"text_to_translate": text_to_translate,
|
||||
"target_lang": target_lang,
|
||||
}
|
||||
if context:
|
||||
template, llm = get_template("translation_with_context")
|
||||
prompt_params["context"] = context
|
||||
else:
|
||||
template, llm = get_template("translation_without_context")
|
||||
Returns:
|
||||
Een dictionary met de vertaalde configuratie
|
||||
"""
|
||||
# Zorg ervoor dat we een dictionary hebben
|
||||
if isinstance(config_data, str):
|
||||
config_data = json.loads(config_data)
|
||||
|
||||
# Add a metrics handler to capture usage
|
||||
# Maak een kopie van de originele data om te wijzigen
|
||||
translated_config = config_data.copy()
|
||||
|
||||
metrics_handler = PersistentLLMMetricsHandler()
|
||||
existing_callbacks = llm.callbacks
|
||||
llm.callbacks = existing_callbacks + [metrics_handler]
|
||||
# Haal type en versie op voor de Business Event span
|
||||
config_type = config_data.get('type', 'Unknown')
|
||||
config_version = config_data.get('version', 'Unknown')
|
||||
span_name = f"{config_type}-{config_version}-{field_config}"
|
||||
|
||||
translation_prompt = ChatPromptTemplate.from_template(template)
|
||||
# Start een Business Event context
|
||||
with BusinessEvent('Config Translation Service', 0):
|
||||
with current_event.create_span(span_name):
|
||||
# Controleer of de gevraagde veld-configuratie bestaat
|
||||
if field_config in config_data:
|
||||
fields = config_data[field_config]
|
||||
|
||||
setup = RunnablePassthrough()
|
||||
# Haal description uit metadata voor context als geen context is opgegeven
|
||||
description_context = ""
|
||||
if not context and 'metadata' in config_data and 'description' in config_data['metadata']:
|
||||
description_context = config_data['metadata']['description']
|
||||
|
||||
chain = (setup | translation_prompt | llm | StrOutputParser())
|
||||
# Loop door elk veld in de configuratie
|
||||
for field_name, field_data in fields.items():
|
||||
# Vertaal name als het bestaat en niet leeg is
|
||||
if 'name' in field_data and field_data['name']:
|
||||
# Gebruik context indien opgegeven, anders description_context
|
||||
field_context = context if context else description_context
|
||||
translated_name = cache_manager.translation_cache.get_translation(
|
||||
text=field_data['name'],
|
||||
target_lang=target_language,
|
||||
source_lang=source_language,
|
||||
context=field_context
|
||||
)
|
||||
if translated_name:
|
||||
translated_config[field_config][field_name]['name'] = translated_name.translated_text
|
||||
|
||||
translation = chain.invoke(prompt_params)
|
||||
if 'title' in field_data and field_data['title']:
|
||||
# Gebruik context indien opgegeven, anders description_context
|
||||
field_context = context if context else description_context
|
||||
translated_title = cache_manager.translation_cache.get_translation(
|
||||
text=field_data['title'],
|
||||
target_lang=target_language,
|
||||
source_lang=source_language,
|
||||
context=field_context
|
||||
)
|
||||
if translated_title:
|
||||
translated_config[field_config][field_name]['title'] = translated_title.translated_text
|
||||
|
||||
metrics = metrics_handler.get_metrics()
|
||||
# Vertaal description als het bestaat en niet leeg is
|
||||
if 'description' in field_data and field_data['description']:
|
||||
# Gebruik context indien opgegeven, anders description_context
|
||||
field_context = context if context else description_context
|
||||
translated_desc = cache_manager.translation_cache.get_translation(
|
||||
text=field_data['description'],
|
||||
target_lang=target_language,
|
||||
source_lang=source_language,
|
||||
context=field_context
|
||||
)
|
||||
if translated_desc:
|
||||
translated_config[field_config][field_name]['description'] = translated_desc.translated_text
|
||||
|
||||
return translation, metrics
|
||||
# Vertaal context als het bestaat en niet leeg is
|
||||
if 'context' in field_data and field_data['context']:
|
||||
translated_ctx = cache_manager.translation_cache.get_translation(
|
||||
text=field_data['context'],
|
||||
target_lang=target_language,
|
||||
source_lang=source_language,
|
||||
context=context
|
||||
)
|
||||
if translated_ctx:
|
||||
translated_config[field_config][field_name]['context'] = translated_ctx.translated_text
|
||||
|
||||
return translated_config
|
||||
|
||||
@staticmethod
|
||||
def translate(text: str, target_language: str, source_language: Optional[str] = None,
|
||||
context: Optional[str] = None)-> str:
|
||||
with BusinessEvent('Translation Service', 0):
|
||||
with current_event.create_span('Translation'):
|
||||
translation_cache = cache_manager.translation_cache.get_translation(text, target_language,
|
||||
source_language, context)
|
||||
return translation_cache.translated_text
|
||||
2
common/utils/cache/regions.py
vendored
2
common/utils/cache/regions.py
vendored
@@ -42,7 +42,7 @@ def create_cache_regions(app):
|
||||
# Region for model-related caching (ModelVariables etc)
|
||||
model_region = make_region(name='eveai_model').configure(
|
||||
'dogpile.cache.redis',
|
||||
arguments=redis_config,
|
||||
arguments={**redis_config, 'db': 6},
|
||||
replace_existing_backend=True
|
||||
)
|
||||
regions['eveai_model'] = model_region
|
||||
|
||||
107
common/utils/cache/translation_cache.py
vendored
107
common/utils/cache/translation_cache.py
vendored
@@ -1,19 +1,25 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
|
||||
import xxhash
|
||||
from flask import current_app
|
||||
from sqlalchemy import and_
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from sqlalchemy.inspection import inspect
|
||||
|
||||
from common.langchain.persistent_llm_metrics_handler import PersistentLLMMetricsHandler
|
||||
from common.utils.business_event_context import current_event
|
||||
from common.utils.cache.base import CacheHandler, T
|
||||
from common.extensions import db
|
||||
|
||||
from common.models.user import TranslationCache
|
||||
from common.services.utils.translation_services import TranslationService
|
||||
from flask_security import current_user
|
||||
|
||||
from common.utils.model_utils import get_template
|
||||
|
||||
|
||||
class TranslationCacheHandler(CacheHandler[TranslationCache]):
|
||||
"""Handles caching of translations with fallback to database and external translation service"""
|
||||
@@ -62,13 +68,33 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
|
||||
|
||||
setattr(translation, column.name, value)
|
||||
|
||||
current_app.logger.debug(f"Translation Cache Retrieved: {translation}")
|
||||
metrics = {
|
||||
'total_tokens': translation.prompt_tokens + translation.completion_tokens,
|
||||
'prompt_tokens': translation.prompt_tokens,
|
||||
'completion_tokens': translation.completion_tokens,
|
||||
'time_elapsed': 0,
|
||||
'interaction_type': 'LLM'
|
||||
}
|
||||
current_event.log_llm_metrics(metrics)
|
||||
|
||||
return translation
|
||||
|
||||
def _should_cache(self, value: TranslationCache) -> bool:
|
||||
def _should_cache(self, value) -> bool:
|
||||
"""Validate if the translation should be cached"""
|
||||
return value is not None and value.cache_key is not None
|
||||
if value is None:
|
||||
return False
|
||||
|
||||
def get_translation(self, text: str, target_lang: str, source_lang:str=None, context: str=None) -> Optional[TranslationCache]:
|
||||
# Handle both TranslationCache objects and serialized data (dict)
|
||||
if isinstance(value, TranslationCache):
|
||||
return value.cache_key is not None
|
||||
elif isinstance(value, dict):
|
||||
return value.get('cache_key') is not None
|
||||
|
||||
return False
|
||||
|
||||
def get_translation(self, text: str, target_lang: str, source_lang: str = None, context: str = None) -> Optional[
|
||||
TranslationCache]:
|
||||
"""
|
||||
Get the translation for a text in a specific language
|
||||
|
||||
@@ -81,26 +107,33 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
|
||||
Returns:
|
||||
TranslationCache instance if found, None otherwise
|
||||
"""
|
||||
if not context:
|
||||
context = 'No context provided.'
|
||||
current_app.logger.debug(f"Getting translation for text: {text[:10]}..., target_lang: {target_lang}, source_lang: {source_lang}, context: {context[:10]}...")
|
||||
|
||||
def creator_func(text: str, target_lang: str, source_lang: str=None, context: str=None) -> Optional[TranslationCache]:
|
||||
# Generate cache key based on inputs
|
||||
cache_key = self._generate_cache_key(text, target_lang, source_lang, context)
|
||||
|
||||
def creator_func(hash_key: str) -> Optional[TranslationCache]:
|
||||
# Check if translation already exists in database
|
||||
existing_translation = db.session.query(TranslationCache).filter_by(cache_key=cache_key).first()
|
||||
existing_translation = db.session.query(TranslationCache).filter_by(cache_key=hash_key).first()
|
||||
|
||||
if existing_translation:
|
||||
# Update last used timestamp
|
||||
existing_translation.last_used_at = dt.now(tz=tz.utc)
|
||||
metrics = {
|
||||
'total_tokens': existing_translation.prompt_tokens + existing_translation.completion_tokens,
|
||||
'prompt_tokens': existing_translation.prompt_tokens,
|
||||
'completion_tokens': existing_translation.completion_tokens,
|
||||
'time_elapsed': 0,
|
||||
'interaction_type': 'LLM'
|
||||
}
|
||||
current_app.logger.debug(f"Found existing translation in DB: {existing_translation.cache_key}")
|
||||
current_app.logger.debug(f"Metrics: {metrics}")
|
||||
current_event.log_llm_metrics(metrics)
|
||||
db.session.commit()
|
||||
return existing_translation
|
||||
|
||||
# Translation not found in DB, need to create it
|
||||
# Initialize translation service
|
||||
translation_service = TranslationService(getattr(current_app, 'tenant_id', None))
|
||||
|
||||
# Get the translation and metrics
|
||||
translated_text, metrics = translation_service.translate_text(
|
||||
translated_text, metrics = self.translate_text(
|
||||
text_to_translate=text,
|
||||
target_lang=target_lang,
|
||||
source_lang=source_lang,
|
||||
@@ -109,10 +142,10 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
|
||||
|
||||
# Create new translation cache record
|
||||
new_translation = TranslationCache(
|
||||
cache_key=cache_key,
|
||||
cache_key=hash_key,
|
||||
source_text=text,
|
||||
translated_text=translated_text,
|
||||
source_language=source_lang or 'auto',
|
||||
source_language=source_lang,
|
||||
target_language=target_lang,
|
||||
context=context,
|
||||
prompt_tokens=metrics.get('prompt_tokens', 0),
|
||||
@@ -130,7 +163,12 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
|
||||
|
||||
return new_translation
|
||||
|
||||
return self.get(creator_func, text=text, target_lang=target_lang, source_lang=source_lang, context=context)
|
||||
# Generate the hash key using your existing method
|
||||
hash_key = self._generate_cache_key(text, target_lang, source_lang, context)
|
||||
current_app.logger.debug(f"Generated hash key: {hash_key}")
|
||||
|
||||
# Pass the hash_key to the get method
|
||||
return self.get(creator_func, hash_key=hash_key)
|
||||
|
||||
def invalidate_tenant_translations(self, tenant_id: int):
|
||||
"""Invalidate cached translations for specific tenant"""
|
||||
@@ -148,6 +186,41 @@ class TranslationCacheHandler(CacheHandler[TranslationCache]):
|
||||
cache_string = json.dumps(cache_data, sort_keys=True, ensure_ascii=False)
|
||||
return xxhash.xxh64(cache_string.encode('utf-8')).hexdigest()
|
||||
|
||||
def translate_text(self, text_to_translate: str, target_lang: str, source_lang: str = None, context: str = None) \
|
||||
-> tuple[str, dict[str, int | float]]:
|
||||
target_language = current_app.config['SUPPORTED_LANGUAGE_ISO639_1_LOOKUP'][target_lang]
|
||||
current_app.logger.debug(f"Target language: {target_language}")
|
||||
prompt_params = {
|
||||
"text_to_translate": text_to_translate,
|
||||
"target_language": target_language,
|
||||
}
|
||||
if context:
|
||||
template, llm = get_template("translation_with_context")
|
||||
prompt_params["context"] = context
|
||||
else:
|
||||
template, llm = get_template("translation_without_context")
|
||||
|
||||
# Add a metrics handler to capture usage
|
||||
|
||||
metrics_handler = PersistentLLMMetricsHandler()
|
||||
existing_callbacks = llm.callbacks
|
||||
llm.callbacks = existing_callbacks + [metrics_handler]
|
||||
|
||||
translation_prompt = ChatPromptTemplate.from_template(template)
|
||||
|
||||
setup = RunnablePassthrough()
|
||||
|
||||
chain = (setup | translation_prompt | llm | StrOutputParser())
|
||||
|
||||
translation = chain.invoke(prompt_params)
|
||||
|
||||
# Remove double square brackets from translation
|
||||
translation = re.sub(r'\[\[(.*?)\]\]', r'\1', translation)
|
||||
|
||||
metrics = metrics_handler.get_metrics()
|
||||
|
||||
return translation, metrics
|
||||
|
||||
def register_translation_cache_handlers(cache_manager) -> None:
|
||||
"""Register translation cache handlers with cache manager"""
|
||||
cache_manager.register_handler(
|
||||
|
||||
@@ -31,7 +31,6 @@ def get_default_chat_customisation(tenant_customisation=None):
|
||||
'markdown_background_color': 'transparent',
|
||||
'markdown_text_color': '#ffffff',
|
||||
'sidebar_markdown': '',
|
||||
'welcome_message': 'Hello! How can I help you today?',
|
||||
}
|
||||
|
||||
# If no tenant customization is provided, return the defaults
|
||||
|
||||
@@ -12,7 +12,6 @@ from datetime import datetime as dt, timezone as tz
|
||||
def set_tenant_session_data(sender, user, **kwargs):
|
||||
tenant = Tenant.query.filter_by(id=user.tenant_id).first()
|
||||
session['tenant'] = tenant.to_dict()
|
||||
session['default_language'] = tenant.default_language
|
||||
partner = Partner.query.filter_by(tenant_id=user.tenant_id).first()
|
||||
if partner:
|
||||
session['partner'] = partner.to_dict()
|
||||
|
||||
Reference in New Issue
Block a user