- Introduction of the Automatic HTML Processor

- Translation Service improvement
- Enable activation / deactivation of Processors
- Renew API-keys for Mistral (leading to workspaces)
- Align all Document views to use of a session catalog
- Allow for different processors for the same file type
This commit is contained in:
Josako
2025-06-26 14:38:40 +02:00
parent f5c9542a49
commit fda267b479
35 changed files with 551 additions and 356 deletions

156
common/utils/cache/translation_cache.py vendored Normal file
View File

@@ -0,0 +1,156 @@
import json
from typing import Dict, Any, Optional
from datetime import datetime as dt, timezone as tz
import xxhash
from flask import current_app
from sqlalchemy import and_
from sqlalchemy.inspection import inspect
from common.utils.cache.base import CacheHandler, T
from common.extensions import db
from common.models.user import TranslationCache
from common.services.utils.translation_services import TranslationService
from flask_security import current_user
class TranslationCacheHandler(CacheHandler[TranslationCache]):
"""Handles caching of translations with fallback to database and external translation service"""
handler_name = 'translation_cache'
def __init__(self, region):
super().__init__(region, 'translation')
self.configure_keys('hash_key')
def _to_cache_data(self, instance: TranslationCache) -> Dict[str, Any]:
"""Convert TranslationCache instance to cache data using SQLAlchemy inspection"""
if not instance:
return {}
mapper = inspect(TranslationCache)
data = {}
for column in mapper.columns:
value = getattr(instance, column.name)
# Handle date serialization
if isinstance(value, dt):
data[column.name] = value.isoformat()
else:
data[column.name] = value
return data
def _from_cache_data(self, data: Dict[str, Any], **kwargs) -> TranslationCache:
if not data:
return None
# Create a new TranslationCache instance
translation = TranslationCache()
mapper = inspect(TranslationCache)
# Set all attributes dynamically
for column in mapper.columns:
if column.name in data:
value = data[column.name]
# Handle date deserialization
if column.name.endswith('_date') and value:
if isinstance(value, str):
value = dt.fromisoformat(value).date()
setattr(translation, column.name, value)
return translation
def _should_cache(self, value: TranslationCache) -> bool:
"""Validate if the translation should be cached"""
return value is not None and value.cache_key is not None
def get_translation(self, text: str, target_lang: str, source_lang:str=None, context: str=None) -> Optional[TranslationCache]:
"""
Get the translation for a text in a specific language
Args:
text: The text to be translated
target_lang: The target language for the translation
source_lang: The source language of the text to be translated
context: Optional context for the translation
Returns:
TranslationCache instance if found, None otherwise
"""
def creator_func(text: str, target_lang: str, source_lang: str=None, context: str=None) -> Optional[TranslationCache]:
# Generate cache key based on inputs
cache_key = self._generate_cache_key(text, target_lang, source_lang, context)
# Check if translation already exists in database
existing_translation = db.session.query(TranslationCache).filter_by(cache_key=cache_key).first()
if existing_translation:
# Update last used timestamp
existing_translation.last_used_at = dt.now(tz=tz.utc)
db.session.commit()
return existing_translation
# Translation not found in DB, need to create it
# Initialize translation service
translation_service = TranslationService(getattr(current_app, 'tenant_id', None))
# Get the translation and metrics
translated_text, metrics = translation_service.translate_text(
text_to_translate=text,
target_lang=target_lang,
source_lang=source_lang,
context=context
)
# Create new translation cache record
new_translation = TranslationCache(
cache_key=cache_key,
source_text=text,
translated_text=translated_text,
source_language=source_lang or 'auto',
target_language=target_lang,
context=context,
prompt_tokens=metrics.get('prompt_tokens', 0),
completion_tokens=metrics.get('completion_tokens', 0),
created_at=dt.now(tz=tz.utc),
created_by=getattr(current_user, 'id', None) if 'current_user' in globals() else None,
updated_at=dt.now(tz=tz.utc),
updated_by=getattr(current_user, 'id', None) if 'current_user' in globals() else None,
last_used_at=dt.now(tz=tz.utc)
)
# Save to database
db.session.add(new_translation)
db.session.commit()
return new_translation
return self.get(creator_func, text=text, target_lang=target_lang, source_lang=source_lang, context=context)
def invalidate_tenant_translations(self, tenant_id: int):
"""Invalidate cached translations for specific tenant"""
self.invalidate(tenant_id=tenant_id)
def _generate_cache_key(self, text: str, target_lang: str, source_lang: str = None, context: str = None) -> str:
"""Generate cache key for a translation"""
cache_data = {
"text": text.strip(),
"target_lang": target_lang.lower(),
"source_lang": source_lang.lower() if source_lang else None,
"context": context.strip() if context else None,
}
cache_string = json.dumps(cache_data, sort_keys=True, ensure_ascii=False)
return xxhash.xxh64(cache_string.encode('utf-8')).hexdigest()
def register_translation_cache_handlers(cache_manager) -> None:
"""Register translation cache handlers with cache manager"""
cache_manager.register_handler(
TranslationCacheHandler,
'eveai_model' # Use existing eveai_model region
)