diff --git a/common/langchain/EveAIHistoryRetriever.py b/common/langchain/eveai_history_retriever.py similarity index 67% rename from common/langchain/EveAIHistoryRetriever.py rename to common/langchain/eveai_history_retriever.py index 1810169..45875af 100644 --- a/common/langchain/EveAIHistoryRetriever.py +++ b/common/langchain/eveai_history_retriever.py @@ -1,23 +1,31 @@ from langchain_core.retrievers import BaseRetriever from sqlalchemy import asc from sqlalchemy.exc import SQLAlchemyError -from pydantic import BaseModel, Field +from pydantic import Field, BaseModel, PrivateAttr from typing import Any, Dict from flask import current_app from common.extensions import db from common.models.interaction import ChatSession, Interaction -from common.utils.datetime_utils import get_date_in_timezone +from common.utils.model_utils import ModelVariables -class EveAIHistoryRetriever(BaseRetriever): - model_variables: Dict[str, Any] = Field(...) - session_id: str = Field(...) +class EveAIHistoryRetriever(BaseRetriever, BaseModel): + _model_variables: ModelVariables = PrivateAttr() + _session_id: str = PrivateAttr() - def __init__(self, model_variables: Dict[str, Any], session_id: str): + def __init__(self, model_variables: ModelVariables, session_id: str): super().__init__() - self.model_variables = model_variables - self.session_id = session_id + self._model_variables = model_variables + self._session_id = session_id + + @property + def model_variables(self) -> ModelVariables: + return self._model_variables + + @property + def session_id(self) -> str: + return self._session_id def _get_relevant_documents(self, query: str): current_app.logger.debug(f'Retrieving history of interactions for query: {query}') diff --git a/common/langchain/EveAIRetriever.py b/common/langchain/eveai_retriever.py similarity index 87% rename from common/langchain/EveAIRetriever.py rename to common/langchain/eveai_retriever.py index 5496398..7c4b684 100644 --- a/common/langchain/EveAIRetriever.py +++ b/common/langchain/eveai_retriever.py @@ -1,30 +1,39 @@ from langchain_core.retrievers import BaseRetriever from sqlalchemy import func, and_, or_, desc from sqlalchemy.exc import SQLAlchemyError -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, PrivateAttr from typing import Any, Dict from flask import current_app from common.extensions import db from common.models.document import Document, DocumentVersion from common.utils.datetime_utils import get_date_in_timezone +from common.utils.model_utils import ModelVariables -class EveAIRetriever(BaseRetriever): - model_variables: Dict[str, Any] = Field(...) - tenant_info: Dict[str, Any] = Field(...) +class EveAIRetriever(BaseRetriever, BaseModel): + _model_variables: ModelVariables = PrivateAttr() + _tenant_info: Dict[str, Any] = PrivateAttr() - def __init__(self, model_variables: Dict[str, Any], tenant_info: Dict[str, Any]): + def __init__(self, model_variables: ModelVariables, tenant_info: Dict[str, Any]): super().__init__() - self.model_variables = model_variables - self.tenant_info = tenant_info + current_app.logger.debug(f'Model variables type: {type(model_variables)}') + self._model_variables = model_variables + self._tenant_info = tenant_info + + @property + def model_variables(self) -> ModelVariables: + return self._model_variables + + @property + def tenant_info(self) -> Dict[str, Any]: + return self._tenant_info def _get_relevant_documents(self, query: str): - - - current_app.logger.debug(f'Retrieving relevant documents for query: {query}') query_embedding = self._get_query_embedding(query) + current_app.logger.debug(f'Model Variables Private: {type(self._model_variables)}') + current_app.logger.debug(f'Model Variables Property: {type(self.model_variables)}') db_class = self.model_variables['embedding_db_model'] similarity_threshold = self.model_variables['similarity_threshold'] k = self.model_variables['k'] diff --git a/common/models/monitoring.py b/common/models/monitoring.py new file mode 100644 index 0000000..de4045f --- /dev/null +++ b/common/models/monitoring.py @@ -0,0 +1,28 @@ +from common.extensions import db +from sqlalchemy.dialects.postgresql import JSONB +import sqlalchemy as sa + + +class LLMUsageMetric(db.Model): + __bind_key__ = 'public' + __table_args__ = {'schema': 'public'} + + id = db.Column(db.Integer, primary_key=True) + tenant_id = db.Column(db.Integer, nullable=False) + environment = db.Column(db.String(20), nullable=False) + activity = db.Column(db.String(20), nullable=False) + sub_activity = db.Column(db.String(20), nullable=False) + activity_detail = db.Column(db.String(50), nullable=True) + session_id = db.Column(db.String(50), nullable=True) # Chat Session ID + interaction_id = db.Column(db.Integer, nullable=True) # Chat Interaction ID + document_version_id = db.Column(db.Integer, nullable=True) + prompt_tokens = db.Column(db.Integer, nullable=True) + completion_tokens = db.Column(db.Integer, nullable=True) + total_tokens = db.Column(db.Integer, nullable=True) + cost = db.Column(db.Float, nullable=True) + latency = db.Column(db.Float, nullable=True) + model_name = db.Column(db.String(50), nullable=False) + timestamp = db.Column(db.DateTime, nullable=False) + additional_info = db.Column(JSONB, nullable=True) + + # Add any additional fields or methods as needed diff --git a/common/models/user.py b/common/models/user.py index d7a28eb..e9d79f3 100644 --- a/common/models/user.py +++ b/common/models/user.py @@ -2,7 +2,6 @@ from common.extensions import db from flask_security import UserMixin, RoleMixin from sqlalchemy.dialects.postgresql import ARRAY import sqlalchemy as sa -from sqlalchemy import CheckConstraint class Tenant(db.Model): diff --git a/common/utils/model_utils.py b/common/utils/model_utils.py index a3b9246..b964482 100644 --- a/common/utils/model_utils.py +++ b/common/utils/model_utils.py @@ -5,14 +5,14 @@ from flask import current_app from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_core.pydantic_v1 import BaseModel, Field -from langchain.prompts import ChatPromptTemplate -import ast -from typing import List +from typing import List, Any, Iterator +from collections.abc import MutableMapping from openai import OpenAI -# from groq import Groq from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI +from common.models.user import Tenant +from config.model_config import MODEL_CONFIG class CitedAnswer(BaseModel): @@ -36,180 +36,221 @@ def set_language_prompt_template(cls, language_prompt): cls.__doc__ = language_prompt -def select_model_variables(tenant): - embedding_provider = tenant.embedding_model.rsplit('.', 1)[0] - embedding_model = tenant.embedding_model.rsplit('.', 1)[1] +class ModelVariables(MutableMapping): + def __init__(self, tenant: Tenant): + self.tenant = tenant + self._variables = self._initialize_variables() + self._embedding_model = None + self._llm = None + self._llm_no_rag = None + self._transcription_client = None + self._prompt_templates = {} + self._embedding_db_model = None - llm_provider = tenant.llm_model.rsplit('.', 1)[0] - llm_model = tenant.llm_model.rsplit('.', 1)[1] + def _initialize_variables(self): + variables = {} - # Set model variables - model_variables = {} - if tenant.es_k: - model_variables['k'] = tenant.es_k - else: - model_variables['k'] = 5 + # We initialize the variables that are available knowing the tenant. For the other, we will apply 'lazy loading' + variables['k'] = self.tenant.es_k or 5 + variables['similarity_threshold'] = self.tenant.es_similarity_threshold or 0.7 + variables['RAG_temperature'] = self.tenant.chat_RAG_temperature or 0.3 + variables['no_RAG_temperature'] = self.tenant.chat_no_RAG_temperature or 0.5 + variables['embed_tuning'] = self.tenant.embed_tuning or False + variables['rag_tuning'] = self.tenant.rag_tuning or False + variables['rag_context'] = self.tenant.rag_context or " " - if tenant.es_similarity_threshold: - model_variables['similarity_threshold'] = tenant.es_similarity_threshold - else: - model_variables['similarity_threshold'] = 0.7 + # Set HTML Chunking Variables + variables['html_tags'] = self.tenant.html_tags + variables['html_end_tags'] = self.tenant.html_end_tags + variables['html_included_elements'] = self.tenant.html_included_elements + variables['html_excluded_elements'] = self.tenant.html_excluded_elements + variables['html_excluded_classes'] = self.tenant.html_excluded_classes - if tenant.chat_RAG_temperature: - model_variables['RAG_temperature'] = tenant.chat_RAG_temperature - else: - model_variables['RAG_temperature'] = 0.3 + # Set Chunk Size variables + variables['min_chunk_size'] = self.tenant.min_chunk_size + variables['max_chunk_size'] = self.tenant.max_chunk_size - if tenant.chat_no_RAG_temperature: - model_variables['no_RAG_temperature'] = tenant.chat_no_RAG_temperature - else: - model_variables['no_RAG_temperature'] = 0.5 + # Set model providers + variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1) + variables['llm_provider'], variables['llm_model'] = self.tenant.llm_model.rsplit('.', 1) + variables["templates"] = current_app.config['PROMPT_TEMPLATES'][(f"{variables['llm_provider']}." + f"{variables['llm_model']}")] + current_app.logger.info(f"Loaded prompt templates: \n") + current_app.logger.info(f"{variables['templates']}") - # Set Tuning variables - if tenant.embed_tuning: - model_variables['embed_tuning'] = tenant.embed_tuning - else: - model_variables['embed_tuning'] = False + # Set model-specific configurations + model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {}) + variables.update(model_config) - if tenant.rag_tuning: - model_variables['rag_tuning'] = tenant.rag_tuning - else: - model_variables['rag_tuning'] = False + variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][self.tenant.llm_model] - if tenant.rag_context: - model_variables['rag_context'] = tenant.rag_context - else: - model_variables['rag_context'] = " " + if variables['tool_calling_supported']: + variables['cited_answer_cls'] = CitedAnswer - # Set HTML Chunking Variables - model_variables['html_tags'] = tenant.html_tags - model_variables['html_end_tags'] = tenant.html_end_tags - model_variables['html_included_elements'] = tenant.html_included_elements - model_variables['html_excluded_elements'] = tenant.html_excluded_elements - model_variables['html_excluded_classes'] = tenant.html_excluded_classes + return variables - # Set Chunk Size variables - model_variables['min_chunk_size'] = tenant.min_chunk_size - model_variables['max_chunk_size'] = tenant.max_chunk_size + @property + def embedding_model(self): + if self._embedding_model is None: + environment = os.getenv('FLASK_ENV', 'development') + portkey_metadata = {'tenant_id': str(self.tenant.id), 'environment': environment} - environment = os.getenv('FLASK_ENV', 'development') - portkey_metadata = {'tenant_id': str(tenant.id), 'environment': environment} + if self._variables['embedding_provider'] == 'openai': + portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'), + provider='openai', + metadata=portkey_metadata) + api_key = os.getenv('OPENAI_API_KEY') + model = self._variables['embedding_model'] + self._embedding_model = OpenAIEmbeddings(api_key=api_key, + model=model, + base_url=PORTKEY_GATEWAY_URL, + default_headers=portkey_headers) + self._embedding_db_model = EmbeddingSmallOpenAI \ + if model == 'text-embedding-3-small' \ + else EmbeddingLargeOpenAI + else: + raise ValueError(f"Invalid embedding provider: {self._variables['embedding_provider']}") - # Set Embedding variables - match embedding_provider: - case 'openai': - portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'), - provider='openai', - metadata=portkey_metadata) - match embedding_model: - case 'text-embedding-3-small': - api_key = current_app.config.get('OPENAI_API_KEY') - model_variables['embedding_model'] = OpenAIEmbeddings(api_key=api_key, - model='text-embedding-3-small', - base_url=PORTKEY_GATEWAY_URL, - default_headers=portkey_headers - ) - model_variables['embedding_db_model'] = EmbeddingSmallOpenAI - case 'text-embedding-3-large': - api_key = current_app.config.get('OPENAI_API_KEY') - model_variables['embedding_model'] = OpenAIEmbeddings(api_key=api_key, - model='text-embedding-3-large', - base_url=PORTKEY_GATEWAY_URL, - default_headers=portkey_headers - ) - model_variables['embedding_db_model'] = EmbeddingLargeOpenAI - case _: - raise Exception(f'Error setting model variables for tenant {tenant.id} ' - f'error: Invalid embedding model') - case _: - raise Exception(f'Error setting model variables for tenant {tenant.id} ' - f'error: Invalid embedding provider') + return self._embedding_model - # Set Chat model variables - match llm_provider: - case 'openai': - portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'), + @property + def llm(self): + if self._llm is None: + self._initialize_llm() + return self._llm + + @property + def llm_no_rag(self): + if self._llm_no_rag is None: + self._initialize_llm() + return self._llm_no_rag + + def _initialize_llm(self): + environment = os.getenv('FLASK_ENV', 'development') + portkey_metadata = {'tenant_id': str(self.tenant.id), 'environment': environment} + + if self._variables['llm_provider'] == 'openai': + portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'), metadata=portkey_metadata, provider='openai') - tool_calling_supported = False - api_key = current_app.config.get('OPENAI_API_KEY') - model_variables['llm'] = ChatOpenAI(api_key=api_key, - model=llm_model, - temperature=model_variables['RAG_temperature'], + api_key = os.getenv('OPENAI_API_KEY') + self._llm = ChatOpenAI(api_key=api_key, + model=self._variables['llm_model'], + temperature=self._variables['RAG_temperature'], + base_url=PORTKEY_GATEWAY_URL, + default_headers=portkey_headers) + self._llm_no_rag = ChatOpenAI(api_key=api_key, + model=self._variables['llm_model'], + temperature=self._variables['no_RAG_temperature'], + base_url=PORTKEY_GATEWAY_URL, + default_headers=portkey_headers) + self._variables['tool_calling_supported'] = self._variables['llm_model'] in ['gpt-4o', 'gpt-4o-mini'] + elif self._variables['llm_provider'] == 'anthropic': + api_key = os.getenv('ANTHROPIC_API_KEY') + llm_model_ext = os.getenv('ANTHROPIC_LLM_VERSIONS', {}).get(self._variables['llm_model']) + self._llm = ChatAnthropic(api_key=api_key, + model=llm_model_ext, + temperature=self._variables['RAG_temperature']) + self._llm_no_rag = ChatAnthropic(api_key=api_key, + model=llm_model_ext, + temperature=self._variables['RAG_temperature']) + self._variables['tool_calling_supported'] = True + else: + raise ValueError(f"Invalid chat provider: {self._variables['llm_provider']}") + + @property + def transcription_client(self): + if self._transcription_client is None: + environment = os.getenv('FLASK_ENV', 'development') + portkey_metadata = {'tenant_id': str(self.tenant.id), 'environment': environment} + portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'), + metadata=portkey_metadata, + provider='openai') + api_key = os.getenv('OPENAI_API_KEY') + self._transcription_client = OpenAI(api_key=api_key, base_url=PORTKEY_GATEWAY_URL, default_headers=portkey_headers) - model_variables['llm_no_rag'] = ChatOpenAI(api_key=api_key, - model=llm_model, - temperature=model_variables['no_RAG_temperature'], - base_url=PORTKEY_GATEWAY_URL, - default_headers=portkey_headers) - tool_calling_supported = False - match llm_model: - case 'gpt-4o' | 'gpt-4o-mini': - tool_calling_supported = True - processing_chunk_size = 10000 - processing_chunk_overlap = 200 - processing_min_chunk_size = 8000 - processing_max_chunk_size = 12000 - case _: - raise Exception(f'Error setting model variables for tenant {tenant.id} ' - f'error: Invalid chat model') - case 'anthropic': - api_key = current_app.config.get('ANTHROPIC_API_KEY') - # Anthropic does not have the same 'generic' model names as OpenAI - llm_model_ext = current_app.config.get('ANTHROPIC_LLM_VERSIONS').get(llm_model) - model_variables['llm'] = ChatAnthropic(api_key=api_key, - model=llm_model_ext, - temperature=model_variables['RAG_temperature']) - model_variables['llm_no_rag'] = ChatAnthropic(api_key=api_key, - model=llm_model_ext, - temperature=model_variables['RAG_temperature']) - tool_calling_supported = True - processing_chunk_size = 10000 - processing_chunk_overlap = 200 - processing_min_chunk_size = 8000 - processing_max_chunk_size = 12000 - case _: - raise Exception(f'Error setting model variables for tenant {tenant.id} ' - f'error: Invalid chat provider') + self._variables['transcription_model'] = 'whisper-1' - model_variables['processing_chunk_size'] = processing_chunk_size - model_variables['processing_chunk_overlap'] = processing_chunk_overlap - model_variables['processing_min_chunk_size'] = processing_min_chunk_size - model_variables['processing_max_chunk_size'] = processing_max_chunk_size + return self._transcription_client - if tool_calling_supported: - model_variables['cited_answer_cls'] = CitedAnswer + @property + def embedding_db_model(self): + if self._embedding_db_model is None: + self._embedding_db_model = self.get_embedding_db_model() + return self._embedding_db_model - templates = current_app.config['PROMPT_TEMPLATES'][f'{llm_provider}.{llm_model}'] - model_variables['summary_template'] = templates['summary'] - model_variables['rag_template'] = templates['rag'] - model_variables['history_template'] = templates['history'] - model_variables['encyclopedia_template'] = templates['encyclopedia'] - model_variables['transcript_template'] = templates['transcript'] - model_variables['html_parse_template'] = templates['html_parse'] - model_variables['pdf_parse_template'] = templates['pdf_parse'] + def get_embedding_db_model(self): + current_app.logger.debug("In get_embedding_db_model") + if self._embedding_db_model is None: + self._embedding_db_model = EmbeddingSmallOpenAI \ + if self._variables['embedding_model'] == 'text-embedding-3-small' \ + else EmbeddingLargeOpenAI + current_app.logger.debug(f"Embedding DB Model: {self._embedding_db_model}") + return self._embedding_db_model - model_variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model] + def get_prompt_template(self, template_name: str) -> str: + current_app.logger.info(f"Getting prompt template for {template_name}") + if template_name not in self._prompt_templates: + self._prompt_templates[template_name] = self._load_prompt_template(template_name) + return self._prompt_templates[template_name] - # Transcription Client Variables. - # Using Groq - # api_key = current_app.config.get('GROQ_API_KEY') - # model_variables['transcription_client'] = Groq(api_key=api_key) - # model_variables['transcription_model'] = 'whisper-large-v3' + def _load_prompt_template(self, template_name: str) -> str: + # In the future, this method will make an API call to Portkey + # For now, we'll simulate it with a placeholder implementation + # You can replace this with your current prompt loading logic + return self._variables['templates'][template_name] - # Using OpenAI for transcriptions - portkey_metadata = {'tenant_id': str(tenant.id)} - portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'), - metadata=portkey_metadata, - provider='openai' - ) - api_key = current_app.config.get('OPENAI_API_KEY') - model_variables['transcription_client'] = OpenAI(api_key=api_key, - base_url=PORTKEY_GATEWAY_URL, - default_headers=portkey_headers) - model_variables['transcription_model'] = 'whisper-1' + def __getitem__(self, key: str) -> Any: + current_app.logger.debug(f"ModelVariables: Getting {key}") + # Support older template names (suffix = _template) + if key.endswith('_template'): + key = key[:-len('_template')] + current_app.logger.debug(f"ModelVariables: Getting modified {key}") + if key == 'embedding_model': + return self.embedding_model + elif key == 'embedding_db_model': + return self.embedding_db_model + elif key == 'llm': + return self.llm + elif key == 'llm_no_rag': + return self.llm_no_rag + elif key == 'transcription_client': + return self.transcription_client + elif key in self._variables.get('prompt_templates', []): + return self.get_prompt_template(key) + return self._variables.get(key) + def __setitem__(self, key: str, value: Any) -> None: + self._variables[key] = value + + def __delitem__(self, key: str) -> None: + del self._variables[key] + + def __iter__(self) -> Iterator[str]: + return iter(self._variables) + + def __len__(self): + return len(self._variables) + + def get(self, key: str, default: Any = None) -> Any: + return self.__getitem__(key) or default + + def update(self, **kwargs) -> None: + self._variables.update(kwargs) + + def items(self): + return self._variables.items() + + def keys(self): + return self._variables.keys() + + def values(self): + return self._variables.values() + + +def select_model_variables(tenant): + model_variables = ModelVariables(tenant=tenant) return model_variables diff --git a/config/config.py b/config/config.py index 04f0771..289efe3 100644 --- a/config/config.py +++ b/config/config.py @@ -137,6 +137,12 @@ class Config(object): MAIL_PASSWORD = environ.get('MAIL_PASSWORD') MAIL_DEFAULT_SENDER = ('eveAI Admin', MAIL_USERNAME) + # Langsmith settings + LANGCHAIN_TRACING_V2 = True + LANGCHAIN_ENDPOINT = 'https://api.smith.langchain.com' + LANGCHAIN_PROJECT = "eveai" + + SUPPORTED_FILE_TYPES = ['pdf', 'html', 'md', 'txt', 'mp3', 'mp4', 'ogg', 'srt'] TENANT_TYPES = ['Active', 'Demo', 'Inactive', 'Test'] diff --git a/config/model_config.py b/config/model_config.py new file mode 100644 index 0000000..a685079 --- /dev/null +++ b/config/model_config.py @@ -0,0 +1,41 @@ +MODEL_CONFIG = { + "openai": { + "gpt-4o": { + "tool_calling_supported": True, + "processing_chunk_size": 10000, + "processing_chunk_overlap": 200, + "processing_min_chunk_size": 8000, + "processing_max_chunk_size": 12000, + "prompt_templates": [ + "summary", "rag", "history", "encyclopedia", + "transcript", "html_parse", "pdf_parse" + ] + }, + "gpt-4o-mini": { + "tool_calling_supported": True, + "processing_chunk_size": 10000, + "processing_chunk_overlap": 200, + "processing_min_chunk_size": 8000, + "processing_max_chunk_size": 12000, + "prompt_templates": [ + "summary", "rag", "history", "encyclopedia", + "transcript", "html_parse", "pdf_parse" + ] + }, + # Add other OpenAI models here + }, + "anthropic": { + "claude-3-5-sonnet": { + "tool_calling_supported": True, + "processing_chunk_size": 10000, + "processing_chunk_overlap": 200, + "processing_min_chunk_size": 8000, + "processing_max_chunk_size": 12000, + "prompt_templates": [ + "summary", "rag", "history", "encyclopedia", + "transcript", "html_parse", "pdf_parse" + ] + }, + # Add other Anthropic models here + }, +} \ No newline at end of file diff --git a/docker/compose_dev.yaml b/docker/compose_dev.yaml index 6470638..1c60433 100644 --- a/docker/compose_dev.yaml +++ b/docker/compose_dev.yaml @@ -32,6 +32,7 @@ x-common-variables: &common-variables MINIO_ACCESS_KEY: minioadmin MINIO_SECRET_KEY: minioadmin NGINX_SERVER_NAME: 'localhost http://macstudio.ask-eve-ai-local.com/' + LANGCHAIN_API_KEY: "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" networks: diff --git a/docker/compose_stackhero.yaml b/docker/compose_stackhero.yaml index 9a6734b..6e5020c 100644 --- a/docker/compose_stackhero.yaml +++ b/docker/compose_stackhero.yaml @@ -38,6 +38,7 @@ x-common-variables: &common-variables MINIO_ACCESS_KEY: 04JKmQln8PQpyTmMiCPc MINIO_SECRET_KEY: 2PEZAD1nlpAmOyDV0TUTuJTQw1qVuYLF3A7GMs0D NGINX_SERVER_NAME: 'evie.askeveai.com mxz536.stackhero-network.com' + LANGCHAIN_API_KEY: "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" networks: eveai-network: diff --git a/eveai_app/__init__.py b/eveai_app/__init__.py index 35cd1c6..58983c6 100644 --- a/eveai_app/__init__.py +++ b/eveai_app/__init__.py @@ -10,6 +10,8 @@ from common.extensions import (db, migrate, bootstrap, security, mail, login_man minio_client, simple_encryption, metrics) from common.models.user import User, Role, Tenant, TenantDomain import common.models.interaction +import common.models.monitoring +import common.models.document from common.utils.nginx_utils import prefixed_url_for from config.logging_config import LOGGING from common.utils.security import set_tenant_session_data diff --git a/eveai_app/views/healthz_views.py b/eveai_app/views/healthz_views.py index bdb6e84..aef0527 100644 --- a/eveai_app/views/healthz_views.py +++ b/eveai_app/views/healthz_views.py @@ -48,7 +48,7 @@ def check_database(): def check_celery(): try: # Send a simple task to Celery - result = current_celery.send_task('tasks.ping', queue='embeddings') + result = current_celery.send_task('ping', queue='embeddings') response = result.get(timeout=10) # Wait for up to 10 seconds for a response return response == 'pong' except CeleryTimeoutError: diff --git a/eveai_chat/views/healthz_views.py b/eveai_chat/views/healthz_views.py index 5babd5f..eabaf6d 100644 --- a/eveai_chat/views/healthz_views.py +++ b/eveai_chat/views/healthz_views.py @@ -20,7 +20,7 @@ def liveness(): def readiness(): checks = { "database": check_database(), - # "celery": check_celery(), + "celery": check_celery(), # Add more checks as needed } @@ -41,7 +41,7 @@ def check_database(): def check_celery(): try: # Send a simple task to Celery - result = current_celery.send_task('tasks.ping', queue='llm_interactions') + result = current_celery.send_task('ping', queue='llm_interactions') response = result.get(timeout=10) # Wait for up to 10 seconds for a response return response == 'pong' except CeleryTimeoutError: diff --git a/eveai_chat_workers/tasks.py b/eveai_chat_workers/tasks.py index 722a5c1..da9fd2f 100644 --- a/eveai_chat_workers/tasks.py +++ b/eveai_chat_workers/tasks.py @@ -22,8 +22,8 @@ from common.models.interaction import ChatSession, Interaction, InteractionEmbed from common.extensions import db from common.utils.celery_utils import current_celery from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template -from common.langchain.EveAIRetriever import EveAIRetriever -from common.langchain.EveAIHistoryRetriever import EveAIHistoryRetriever +from common.langchain.eveai_retriever import EveAIRetriever +from common.langchain.eveai_history_retriever import EveAIHistoryRetriever # Healthcheck task @@ -33,7 +33,10 @@ def ping(): def detail_question(question, language, model_variables, session_id): - retriever = EveAIHistoryRetriever(model_variables, session_id) + current_app.logger.debug(f'Detail question: {question}') + current_app.logger.debug(f'model_varialbes: {model_variables}') + current_app.logger.debug(f'session_id: {session_id}') + retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id) llm = model_variables['llm'] template = model_variables['history_template'] language_template = create_language_template(template, language) diff --git a/migrations/public/versions/25588210dab2_llm_metrics_added.py b/migrations/public/versions/25588210dab2_llm_metrics_added.py new file mode 100644 index 0000000..fece0c6 --- /dev/null +++ b/migrations/public/versions/25588210dab2_llm_metrics_added.py @@ -0,0 +1,49 @@ +"""LLM Metrics Added + +Revision ID: 25588210dab2 +Revises: 083ccd8206ea +Create Date: 2024-09-17 12:44:12.242990 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '25588210dab2' +down_revision = '083ccd8206ea' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('llm_usage_metric', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('tenant_id', sa.Integer(), nullable=False), + sa.Column('environment', sa.String(length=20), nullable=False), + sa.Column('activity', sa.String(length=20), nullable=False), + sa.Column('sub_activity', sa.String(length=20), nullable=False), + sa.Column('activity_detail', sa.String(length=50), nullable=True), + sa.Column('session_id', sa.String(length=50), nullable=True), + sa.Column('interaction_id', sa.Integer(), nullable=True), + sa.Column('document_version_id', sa.Integer(), nullable=True), + sa.Column('prompt_tokens', sa.Integer(), nullable=True), + sa.Column('completion_tokens', sa.Integer(), nullable=True), + sa.Column('total_tokens', sa.Integer(), nullable=True), + sa.Column('cost', sa.Float(), nullable=True), + sa.Column('latency', sa.Float(), nullable=True), + sa.Column('model_name', sa.String(length=50), nullable=False), + sa.Column('timestamp', sa.DateTime(), nullable=False), + sa.Column('additional_info', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.PrimaryKeyConstraint('id'), + schema='public' + ) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('llm_usage_metric', schema='public') + # ### end Alembic commands ### diff --git a/requirements.txt b/requirements.txt index e7322cb..9fcec88 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,22 +26,22 @@ greenlet~=3.0.3 gunicorn~=22.0.0 Jinja2~=3.1.4 kombu~=5.3.7 -langchain~=0.2.7 -langchain-anthropic~=0.1.19 -langchain-community~=0.2.7 -langchain-core~=0.2.16 -langchain-mistralai~=0.1.9 -langchain-openai~=0.1.15 -langchain-postgres~=0.0.9 -langchain-text-splitters~=0.2.2 +langchain~=0.3.0 +langchain-anthropic~=0.2.0 +langchain-community~=0.3.0 +langchain-core~=0.3.0 +langchain-mistralai~=0.2.0 +langchain-openai~=0.2.0 +langchain-postgres~=0.0.12 +langchain-text-splitters~=0.3.0 langcodes~=3.4.0 langdetect~=1.0.9 langsmith~=0.1.81 -openai~=1.35.13 +openai~=1.45.1 pg8000~=1.31.2 pgvector~=0.2.5 pycryptodome~=3.20.0 -pydantic~=2.7.4 +pydantic~=2.9.1 PyJWT~=2.8.0 PySocks~=1.7.1 python-dateutil~=2.9.0.post0 @@ -53,7 +53,7 @@ pytz~=2024.1 PyYAML~=6.0.2rc1 redis~=5.0.4 requests~=2.32.3 -SQLAlchemy~=2.0.31 +SQLAlchemy~=2.0.35 tiktoken~=0.7.0 tzdata~=2024.1 urllib3~=2.2.2 @@ -76,4 +76,7 @@ PyPDF2~=3.0.1 flask-restx~=1.3.0 prometheus-flask-exporter~=0.23.1 flask-healthz~=1.0.1 +langsmith~=0.1.121 +anthropic~=0.34.2 +prometheus-client~=0.20.0 diff --git a/scripts/start_eveai_workers.sh b/scripts/start_eveai_workers.sh index 374db96..d03a45f 100755 --- a/scripts/start_eveai_workers.sh +++ b/scripts/start_eveai_workers.sh @@ -8,7 +8,7 @@ export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # In chown -R appuser:appuser /app/logs # Start a worker for the 'embeddings' queue with higher concurrency -celery -A eveai_workers.celery worker --loglevel=info -Q embeddings --autoscale=2,8 --hostname=embeddings_worker@%h & +celery -A eveai_workers.celery worker --loglevel=debug -Q embeddings --autoscale=2,8 --hostname=embeddings_worker@%h & # Start a worker for the 'llm_interactions' queue with auto-scaling - not necessary, in eveai_chat_workers # celery -A eveai_workers.celery worker --loglevel=info - Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h &