- turned model_variables into a class with lazy loading

- some improvements to Healthchecks
This commit is contained in:
Josako
2024-09-24 10:48:52 +02:00
parent 67bdeac434
commit a740c96630
16 changed files with 382 additions and 191 deletions

View File

@@ -1,23 +1,31 @@
from langchain_core.retrievers import BaseRetriever from langchain_core.retrievers import BaseRetriever
from sqlalchemy import asc from sqlalchemy import asc
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from pydantic import BaseModel, Field from pydantic import Field, BaseModel, PrivateAttr
from typing import Any, Dict from typing import Any, Dict
from flask import current_app from flask import current_app
from common.extensions import db from common.extensions import db
from common.models.interaction import ChatSession, Interaction from common.models.interaction import ChatSession, Interaction
from common.utils.datetime_utils import get_date_in_timezone from common.utils.model_utils import ModelVariables
class EveAIHistoryRetriever(BaseRetriever): class EveAIHistoryRetriever(BaseRetriever, BaseModel):
model_variables: Dict[str, Any] = Field(...) _model_variables: ModelVariables = PrivateAttr()
session_id: str = Field(...) _session_id: str = PrivateAttr()
def __init__(self, model_variables: Dict[str, Any], session_id: str): def __init__(self, model_variables: ModelVariables, session_id: str):
super().__init__() super().__init__()
self.model_variables = model_variables self._model_variables = model_variables
self.session_id = session_id self._session_id = session_id
@property
def model_variables(self) -> ModelVariables:
return self._model_variables
@property
def session_id(self) -> str:
return self._session_id
def _get_relevant_documents(self, query: str): def _get_relevant_documents(self, query: str):
current_app.logger.debug(f'Retrieving history of interactions for query: {query}') current_app.logger.debug(f'Retrieving history of interactions for query: {query}')

View File

@@ -1,30 +1,39 @@
from langchain_core.retrievers import BaseRetriever from langchain_core.retrievers import BaseRetriever
from sqlalchemy import func, and_, or_, desc from sqlalchemy import func, and_, or_, desc
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, PrivateAttr
from typing import Any, Dict from typing import Any, Dict
from flask import current_app from flask import current_app
from common.extensions import db from common.extensions import db
from common.models.document import Document, DocumentVersion from common.models.document import Document, DocumentVersion
from common.utils.datetime_utils import get_date_in_timezone from common.utils.datetime_utils import get_date_in_timezone
from common.utils.model_utils import ModelVariables
class EveAIRetriever(BaseRetriever): class EveAIRetriever(BaseRetriever, BaseModel):
model_variables: Dict[str, Any] = Field(...) _model_variables: ModelVariables = PrivateAttr()
tenant_info: Dict[str, Any] = Field(...) _tenant_info: Dict[str, Any] = PrivateAttr()
def __init__(self, model_variables: Dict[str, Any], tenant_info: Dict[str, Any]): def __init__(self, model_variables: ModelVariables, tenant_info: Dict[str, Any]):
super().__init__() super().__init__()
self.model_variables = model_variables current_app.logger.debug(f'Model variables type: {type(model_variables)}')
self.tenant_info = tenant_info self._model_variables = model_variables
self._tenant_info = tenant_info
@property
def model_variables(self) -> ModelVariables:
return self._model_variables
@property
def tenant_info(self) -> Dict[str, Any]:
return self._tenant_info
def _get_relevant_documents(self, query: str): def _get_relevant_documents(self, query: str):
current_app.logger.debug(f'Retrieving relevant documents for query: {query}') current_app.logger.debug(f'Retrieving relevant documents for query: {query}')
query_embedding = self._get_query_embedding(query) query_embedding = self._get_query_embedding(query)
current_app.logger.debug(f'Model Variables Private: {type(self._model_variables)}')
current_app.logger.debug(f'Model Variables Property: {type(self.model_variables)}')
db_class = self.model_variables['embedding_db_model'] db_class = self.model_variables['embedding_db_model']
similarity_threshold = self.model_variables['similarity_threshold'] similarity_threshold = self.model_variables['similarity_threshold']
k = self.model_variables['k'] k = self.model_variables['k']

View File

@@ -0,0 +1,28 @@
from common.extensions import db
from sqlalchemy.dialects.postgresql import JSONB
import sqlalchemy as sa
class LLMUsageMetric(db.Model):
__bind_key__ = 'public'
__table_args__ = {'schema': 'public'}
id = db.Column(db.Integer, primary_key=True)
tenant_id = db.Column(db.Integer, nullable=False)
environment = db.Column(db.String(20), nullable=False)
activity = db.Column(db.String(20), nullable=False)
sub_activity = db.Column(db.String(20), nullable=False)
activity_detail = db.Column(db.String(50), nullable=True)
session_id = db.Column(db.String(50), nullable=True) # Chat Session ID
interaction_id = db.Column(db.Integer, nullable=True) # Chat Interaction ID
document_version_id = db.Column(db.Integer, nullable=True)
prompt_tokens = db.Column(db.Integer, nullable=True)
completion_tokens = db.Column(db.Integer, nullable=True)
total_tokens = db.Column(db.Integer, nullable=True)
cost = db.Column(db.Float, nullable=True)
latency = db.Column(db.Float, nullable=True)
model_name = db.Column(db.String(50), nullable=False)
timestamp = db.Column(db.DateTime, nullable=False)
additional_info = db.Column(JSONB, nullable=True)
# Add any additional fields or methods as needed

View File

@@ -2,7 +2,6 @@ from common.extensions import db
from flask_security import UserMixin, RoleMixin from flask_security import UserMixin, RoleMixin
from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.dialects.postgresql import ARRAY
import sqlalchemy as sa import sqlalchemy as sa
from sqlalchemy import CheckConstraint
class Tenant(db.Model): class Tenant(db.Model):

View File

@@ -5,14 +5,14 @@ from flask import current_app
from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_anthropic import ChatAnthropic from langchain_anthropic import ChatAnthropic
from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import ChatPromptTemplate from typing import List, Any, Iterator
import ast from collections.abc import MutableMapping
from typing import List
from openai import OpenAI from openai import OpenAI
# from groq import Groq
from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI
from common.models.user import Tenant
from config.model_config import MODEL_CONFIG
class CitedAnswer(BaseModel): class CitedAnswer(BaseModel):
@@ -36,180 +36,221 @@ def set_language_prompt_template(cls, language_prompt):
cls.__doc__ = language_prompt cls.__doc__ = language_prompt
def select_model_variables(tenant): class ModelVariables(MutableMapping):
embedding_provider = tenant.embedding_model.rsplit('.', 1)[0] def __init__(self, tenant: Tenant):
embedding_model = tenant.embedding_model.rsplit('.', 1)[1] self.tenant = tenant
self._variables = self._initialize_variables()
self._embedding_model = None
self._llm = None
self._llm_no_rag = None
self._transcription_client = None
self._prompt_templates = {}
self._embedding_db_model = None
llm_provider = tenant.llm_model.rsplit('.', 1)[0] def _initialize_variables(self):
llm_model = tenant.llm_model.rsplit('.', 1)[1] variables = {}
# Set model variables # We initialize the variables that are available knowing the tenant. For the other, we will apply 'lazy loading'
model_variables = {} variables['k'] = self.tenant.es_k or 5
if tenant.es_k: variables['similarity_threshold'] = self.tenant.es_similarity_threshold or 0.7
model_variables['k'] = tenant.es_k variables['RAG_temperature'] = self.tenant.chat_RAG_temperature or 0.3
else: variables['no_RAG_temperature'] = self.tenant.chat_no_RAG_temperature or 0.5
model_variables['k'] = 5 variables['embed_tuning'] = self.tenant.embed_tuning or False
variables['rag_tuning'] = self.tenant.rag_tuning or False
if tenant.es_similarity_threshold: variables['rag_context'] = self.tenant.rag_context or " "
model_variables['similarity_threshold'] = tenant.es_similarity_threshold
else:
model_variables['similarity_threshold'] = 0.7
if tenant.chat_RAG_temperature:
model_variables['RAG_temperature'] = tenant.chat_RAG_temperature
else:
model_variables['RAG_temperature'] = 0.3
if tenant.chat_no_RAG_temperature:
model_variables['no_RAG_temperature'] = tenant.chat_no_RAG_temperature
else:
model_variables['no_RAG_temperature'] = 0.5
# Set Tuning variables
if tenant.embed_tuning:
model_variables['embed_tuning'] = tenant.embed_tuning
else:
model_variables['embed_tuning'] = False
if tenant.rag_tuning:
model_variables['rag_tuning'] = tenant.rag_tuning
else:
model_variables['rag_tuning'] = False
if tenant.rag_context:
model_variables['rag_context'] = tenant.rag_context
else:
model_variables['rag_context'] = " "
# Set HTML Chunking Variables # Set HTML Chunking Variables
model_variables['html_tags'] = tenant.html_tags variables['html_tags'] = self.tenant.html_tags
model_variables['html_end_tags'] = tenant.html_end_tags variables['html_end_tags'] = self.tenant.html_end_tags
model_variables['html_included_elements'] = tenant.html_included_elements variables['html_included_elements'] = self.tenant.html_included_elements
model_variables['html_excluded_elements'] = tenant.html_excluded_elements variables['html_excluded_elements'] = self.tenant.html_excluded_elements
model_variables['html_excluded_classes'] = tenant.html_excluded_classes variables['html_excluded_classes'] = self.tenant.html_excluded_classes
# Set Chunk Size variables # Set Chunk Size variables
model_variables['min_chunk_size'] = tenant.min_chunk_size variables['min_chunk_size'] = self.tenant.min_chunk_size
model_variables['max_chunk_size'] = tenant.max_chunk_size variables['max_chunk_size'] = self.tenant.max_chunk_size
# Set model providers
variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
variables['llm_provider'], variables['llm_model'] = self.tenant.llm_model.rsplit('.', 1)
variables["templates"] = current_app.config['PROMPT_TEMPLATES'][(f"{variables['llm_provider']}."
f"{variables['llm_model']}")]
current_app.logger.info(f"Loaded prompt templates: \n")
current_app.logger.info(f"{variables['templates']}")
# Set model-specific configurations
model_config = MODEL_CONFIG.get(variables['llm_provider'], {}).get(variables['llm_model'], {})
variables.update(model_config)
variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][self.tenant.llm_model]
if variables['tool_calling_supported']:
variables['cited_answer_cls'] = CitedAnswer
return variables
@property
def embedding_model(self):
if self._embedding_model is None:
environment = os.getenv('FLASK_ENV', 'development') environment = os.getenv('FLASK_ENV', 'development')
portkey_metadata = {'tenant_id': str(tenant.id), 'environment': environment} portkey_metadata = {'tenant_id': str(self.tenant.id), 'environment': environment}
# Set Embedding variables if self._variables['embedding_provider'] == 'openai':
match embedding_provider: portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
case 'openai':
portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'),
provider='openai', provider='openai',
metadata=portkey_metadata) metadata=portkey_metadata)
match embedding_model: api_key = os.getenv('OPENAI_API_KEY')
case 'text-embedding-3-small': model = self._variables['embedding_model']
api_key = current_app.config.get('OPENAI_API_KEY') self._embedding_model = OpenAIEmbeddings(api_key=api_key,
model_variables['embedding_model'] = OpenAIEmbeddings(api_key=api_key, model=model,
model='text-embedding-3-small',
base_url=PORTKEY_GATEWAY_URL, base_url=PORTKEY_GATEWAY_URL,
default_headers=portkey_headers default_headers=portkey_headers)
) self._embedding_db_model = EmbeddingSmallOpenAI \
model_variables['embedding_db_model'] = EmbeddingSmallOpenAI if model == 'text-embedding-3-small' \
case 'text-embedding-3-large': else EmbeddingLargeOpenAI
api_key = current_app.config.get('OPENAI_API_KEY') else:
model_variables['embedding_model'] = OpenAIEmbeddings(api_key=api_key, raise ValueError(f"Invalid embedding provider: {self._variables['embedding_provider']}")
model='text-embedding-3-large',
base_url=PORTKEY_GATEWAY_URL,
default_headers=portkey_headers
)
model_variables['embedding_db_model'] = EmbeddingLargeOpenAI
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid embedding model')
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid embedding provider')
# Set Chat model variables return self._embedding_model
match llm_provider:
case 'openai': @property
portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'), def llm(self):
if self._llm is None:
self._initialize_llm()
return self._llm
@property
def llm_no_rag(self):
if self._llm_no_rag is None:
self._initialize_llm()
return self._llm_no_rag
def _initialize_llm(self):
environment = os.getenv('FLASK_ENV', 'development')
portkey_metadata = {'tenant_id': str(self.tenant.id), 'environment': environment}
if self._variables['llm_provider'] == 'openai':
portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
metadata=portkey_metadata, metadata=portkey_metadata,
provider='openai') provider='openai')
tool_calling_supported = False api_key = os.getenv('OPENAI_API_KEY')
api_key = current_app.config.get('OPENAI_API_KEY') self._llm = ChatOpenAI(api_key=api_key,
model_variables['llm'] = ChatOpenAI(api_key=api_key, model=self._variables['llm_model'],
model=llm_model, temperature=self._variables['RAG_temperature'],
temperature=model_variables['RAG_temperature'],
base_url=PORTKEY_GATEWAY_URL, base_url=PORTKEY_GATEWAY_URL,
default_headers=portkey_headers) default_headers=portkey_headers)
model_variables['llm_no_rag'] = ChatOpenAI(api_key=api_key, self._llm_no_rag = ChatOpenAI(api_key=api_key,
model=llm_model, model=self._variables['llm_model'],
temperature=model_variables['no_RAG_temperature'], temperature=self._variables['no_RAG_temperature'],
base_url=PORTKEY_GATEWAY_URL, base_url=PORTKEY_GATEWAY_URL,
default_headers=portkey_headers) default_headers=portkey_headers)
tool_calling_supported = False self._variables['tool_calling_supported'] = self._variables['llm_model'] in ['gpt-4o', 'gpt-4o-mini']
match llm_model: elif self._variables['llm_provider'] == 'anthropic':
case 'gpt-4o' | 'gpt-4o-mini': api_key = os.getenv('ANTHROPIC_API_KEY')
tool_calling_supported = True llm_model_ext = os.getenv('ANTHROPIC_LLM_VERSIONS', {}).get(self._variables['llm_model'])
processing_chunk_size = 10000 self._llm = ChatAnthropic(api_key=api_key,
processing_chunk_overlap = 200
processing_min_chunk_size = 8000
processing_max_chunk_size = 12000
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid chat model')
case 'anthropic':
api_key = current_app.config.get('ANTHROPIC_API_KEY')
# Anthropic does not have the same 'generic' model names as OpenAI
llm_model_ext = current_app.config.get('ANTHROPIC_LLM_VERSIONS').get(llm_model)
model_variables['llm'] = ChatAnthropic(api_key=api_key,
model=llm_model_ext, model=llm_model_ext,
temperature=model_variables['RAG_temperature']) temperature=self._variables['RAG_temperature'])
model_variables['llm_no_rag'] = ChatAnthropic(api_key=api_key, self._llm_no_rag = ChatAnthropic(api_key=api_key,
model=llm_model_ext, model=llm_model_ext,
temperature=model_variables['RAG_temperature']) temperature=self._variables['RAG_temperature'])
tool_calling_supported = True self._variables['tool_calling_supported'] = True
processing_chunk_size = 10000 else:
processing_chunk_overlap = 200 raise ValueError(f"Invalid chat provider: {self._variables['llm_provider']}")
processing_min_chunk_size = 8000
processing_max_chunk_size = 12000
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid chat provider')
model_variables['processing_chunk_size'] = processing_chunk_size @property
model_variables['processing_chunk_overlap'] = processing_chunk_overlap def transcription_client(self):
model_variables['processing_min_chunk_size'] = processing_min_chunk_size if self._transcription_client is None:
model_variables['processing_max_chunk_size'] = processing_max_chunk_size environment = os.getenv('FLASK_ENV', 'development')
portkey_metadata = {'tenant_id': str(self.tenant.id), 'environment': environment}
if tool_calling_supported: portkey_headers = createHeaders(api_key=os.getenv('PORTKEY_API_KEY'),
model_variables['cited_answer_cls'] = CitedAnswer
templates = current_app.config['PROMPT_TEMPLATES'][f'{llm_provider}.{llm_model}']
model_variables['summary_template'] = templates['summary']
model_variables['rag_template'] = templates['rag']
model_variables['history_template'] = templates['history']
model_variables['encyclopedia_template'] = templates['encyclopedia']
model_variables['transcript_template'] = templates['transcript']
model_variables['html_parse_template'] = templates['html_parse']
model_variables['pdf_parse_template'] = templates['pdf_parse']
model_variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model]
# Transcription Client Variables.
# Using Groq
# api_key = current_app.config.get('GROQ_API_KEY')
# model_variables['transcription_client'] = Groq(api_key=api_key)
# model_variables['transcription_model'] = 'whisper-large-v3'
# Using OpenAI for transcriptions
portkey_metadata = {'tenant_id': str(tenant.id)}
portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'),
metadata=portkey_metadata, metadata=portkey_metadata,
provider='openai' provider='openai')
) api_key = os.getenv('OPENAI_API_KEY')
api_key = current_app.config.get('OPENAI_API_KEY') self._transcription_client = OpenAI(api_key=api_key,
model_variables['transcription_client'] = OpenAI(api_key=api_key,
base_url=PORTKEY_GATEWAY_URL, base_url=PORTKEY_GATEWAY_URL,
default_headers=portkey_headers) default_headers=portkey_headers)
model_variables['transcription_model'] = 'whisper-1' self._variables['transcription_model'] = 'whisper-1'
return self._transcription_client
@property
def embedding_db_model(self):
if self._embedding_db_model is None:
self._embedding_db_model = self.get_embedding_db_model()
return self._embedding_db_model
def get_embedding_db_model(self):
current_app.logger.debug("In get_embedding_db_model")
if self._embedding_db_model is None:
self._embedding_db_model = EmbeddingSmallOpenAI \
if self._variables['embedding_model'] == 'text-embedding-3-small' \
else EmbeddingLargeOpenAI
current_app.logger.debug(f"Embedding DB Model: {self._embedding_db_model}")
return self._embedding_db_model
def get_prompt_template(self, template_name: str) -> str:
current_app.logger.info(f"Getting prompt template for {template_name}")
if template_name not in self._prompt_templates:
self._prompt_templates[template_name] = self._load_prompt_template(template_name)
return self._prompt_templates[template_name]
def _load_prompt_template(self, template_name: str) -> str:
# In the future, this method will make an API call to Portkey
# For now, we'll simulate it with a placeholder implementation
# You can replace this with your current prompt loading logic
return self._variables['templates'][template_name]
def __getitem__(self, key: str) -> Any:
current_app.logger.debug(f"ModelVariables: Getting {key}")
# Support older template names (suffix = _template)
if key.endswith('_template'):
key = key[:-len('_template')]
current_app.logger.debug(f"ModelVariables: Getting modified {key}")
if key == 'embedding_model':
return self.embedding_model
elif key == 'embedding_db_model':
return self.embedding_db_model
elif key == 'llm':
return self.llm
elif key == 'llm_no_rag':
return self.llm_no_rag
elif key == 'transcription_client':
return self.transcription_client
elif key in self._variables.get('prompt_templates', []):
return self.get_prompt_template(key)
return self._variables.get(key)
def __setitem__(self, key: str, value: Any) -> None:
self._variables[key] = value
def __delitem__(self, key: str) -> None:
del self._variables[key]
def __iter__(self) -> Iterator[str]:
return iter(self._variables)
def __len__(self):
return len(self._variables)
def get(self, key: str, default: Any = None) -> Any:
return self.__getitem__(key) or default
def update(self, **kwargs) -> None:
self._variables.update(kwargs)
def items(self):
return self._variables.items()
def keys(self):
return self._variables.keys()
def values(self):
return self._variables.values()
def select_model_variables(tenant):
model_variables = ModelVariables(tenant=tenant)
return model_variables return model_variables

View File

@@ -137,6 +137,12 @@ class Config(object):
MAIL_PASSWORD = environ.get('MAIL_PASSWORD') MAIL_PASSWORD = environ.get('MAIL_PASSWORD')
MAIL_DEFAULT_SENDER = ('eveAI Admin', MAIL_USERNAME) MAIL_DEFAULT_SENDER = ('eveAI Admin', MAIL_USERNAME)
# Langsmith settings
LANGCHAIN_TRACING_V2 = True
LANGCHAIN_ENDPOINT = 'https://api.smith.langchain.com'
LANGCHAIN_PROJECT = "eveai"
SUPPORTED_FILE_TYPES = ['pdf', 'html', 'md', 'txt', 'mp3', 'mp4', 'ogg', 'srt'] SUPPORTED_FILE_TYPES = ['pdf', 'html', 'md', 'txt', 'mp3', 'mp4', 'ogg', 'srt']
TENANT_TYPES = ['Active', 'Demo', 'Inactive', 'Test'] TENANT_TYPES = ['Active', 'Demo', 'Inactive', 'Test']

41
config/model_config.py Normal file
View File

@@ -0,0 +1,41 @@
MODEL_CONFIG = {
"openai": {
"gpt-4o": {
"tool_calling_supported": True,
"processing_chunk_size": 10000,
"processing_chunk_overlap": 200,
"processing_min_chunk_size": 8000,
"processing_max_chunk_size": 12000,
"prompt_templates": [
"summary", "rag", "history", "encyclopedia",
"transcript", "html_parse", "pdf_parse"
]
},
"gpt-4o-mini": {
"tool_calling_supported": True,
"processing_chunk_size": 10000,
"processing_chunk_overlap": 200,
"processing_min_chunk_size": 8000,
"processing_max_chunk_size": 12000,
"prompt_templates": [
"summary", "rag", "history", "encyclopedia",
"transcript", "html_parse", "pdf_parse"
]
},
# Add other OpenAI models here
},
"anthropic": {
"claude-3-5-sonnet": {
"tool_calling_supported": True,
"processing_chunk_size": 10000,
"processing_chunk_overlap": 200,
"processing_min_chunk_size": 8000,
"processing_max_chunk_size": 12000,
"prompt_templates": [
"summary", "rag", "history", "encyclopedia",
"transcript", "html_parse", "pdf_parse"
]
},
# Add other Anthropic models here
},
}

View File

@@ -32,6 +32,7 @@ x-common-variables: &common-variables
MINIO_ACCESS_KEY: minioadmin MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin MINIO_SECRET_KEY: minioadmin
NGINX_SERVER_NAME: 'localhost http://macstudio.ask-eve-ai-local.com/' NGINX_SERVER_NAME: 'localhost http://macstudio.ask-eve-ai-local.com/'
LANGCHAIN_API_KEY: "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"
networks: networks:

View File

@@ -38,6 +38,7 @@ x-common-variables: &common-variables
MINIO_ACCESS_KEY: 04JKmQln8PQpyTmMiCPc MINIO_ACCESS_KEY: 04JKmQln8PQpyTmMiCPc
MINIO_SECRET_KEY: 2PEZAD1nlpAmOyDV0TUTuJTQw1qVuYLF3A7GMs0D MINIO_SECRET_KEY: 2PEZAD1nlpAmOyDV0TUTuJTQw1qVuYLF3A7GMs0D
NGINX_SERVER_NAME: 'evie.askeveai.com mxz536.stackhero-network.com' NGINX_SERVER_NAME: 'evie.askeveai.com mxz536.stackhero-network.com'
LANGCHAIN_API_KEY: "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"
networks: networks:
eveai-network: eveai-network:

View File

@@ -10,6 +10,8 @@ from common.extensions import (db, migrate, bootstrap, security, mail, login_man
minio_client, simple_encryption, metrics) minio_client, simple_encryption, metrics)
from common.models.user import User, Role, Tenant, TenantDomain from common.models.user import User, Role, Tenant, TenantDomain
import common.models.interaction import common.models.interaction
import common.models.monitoring
import common.models.document
from common.utils.nginx_utils import prefixed_url_for from common.utils.nginx_utils import prefixed_url_for
from config.logging_config import LOGGING from config.logging_config import LOGGING
from common.utils.security import set_tenant_session_data from common.utils.security import set_tenant_session_data

View File

@@ -48,7 +48,7 @@ def check_database():
def check_celery(): def check_celery():
try: try:
# Send a simple task to Celery # Send a simple task to Celery
result = current_celery.send_task('tasks.ping', queue='embeddings') result = current_celery.send_task('ping', queue='embeddings')
response = result.get(timeout=10) # Wait for up to 10 seconds for a response response = result.get(timeout=10) # Wait for up to 10 seconds for a response
return response == 'pong' return response == 'pong'
except CeleryTimeoutError: except CeleryTimeoutError:

View File

@@ -20,7 +20,7 @@ def liveness():
def readiness(): def readiness():
checks = { checks = {
"database": check_database(), "database": check_database(),
# "celery": check_celery(), "celery": check_celery(),
# Add more checks as needed # Add more checks as needed
} }
@@ -41,7 +41,7 @@ def check_database():
def check_celery(): def check_celery():
try: try:
# Send a simple task to Celery # Send a simple task to Celery
result = current_celery.send_task('tasks.ping', queue='llm_interactions') result = current_celery.send_task('ping', queue='llm_interactions')
response = result.get(timeout=10) # Wait for up to 10 seconds for a response response = result.get(timeout=10) # Wait for up to 10 seconds for a response
return response == 'pong' return response == 'pong'
except CeleryTimeoutError: except CeleryTimeoutError:

View File

@@ -22,8 +22,8 @@ from common.models.interaction import ChatSession, Interaction, InteractionEmbed
from common.extensions import db from common.extensions import db
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template
from common.langchain.EveAIRetriever import EveAIRetriever from common.langchain.eveai_retriever import EveAIRetriever
from common.langchain.EveAIHistoryRetriever import EveAIHistoryRetriever from common.langchain.eveai_history_retriever import EveAIHistoryRetriever
# Healthcheck task # Healthcheck task
@@ -33,7 +33,10 @@ def ping():
def detail_question(question, language, model_variables, session_id): def detail_question(question, language, model_variables, session_id):
retriever = EveAIHistoryRetriever(model_variables, session_id) current_app.logger.debug(f'Detail question: {question}')
current_app.logger.debug(f'model_varialbes: {model_variables}')
current_app.logger.debug(f'session_id: {session_id}')
retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
llm = model_variables['llm'] llm = model_variables['llm']
template = model_variables['history_template'] template = model_variables['history_template']
language_template = create_language_template(template, language) language_template = create_language_template(template, language)

View File

@@ -0,0 +1,49 @@
"""LLM Metrics Added
Revision ID: 25588210dab2
Revises: 083ccd8206ea
Create Date: 2024-09-17 12:44:12.242990
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '25588210dab2'
down_revision = '083ccd8206ea'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('llm_usage_metric',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tenant_id', sa.Integer(), nullable=False),
sa.Column('environment', sa.String(length=20), nullable=False),
sa.Column('activity', sa.String(length=20), nullable=False),
sa.Column('sub_activity', sa.String(length=20), nullable=False),
sa.Column('activity_detail', sa.String(length=50), nullable=True),
sa.Column('session_id', sa.String(length=50), nullable=True),
sa.Column('interaction_id', sa.Integer(), nullable=True),
sa.Column('document_version_id', sa.Integer(), nullable=True),
sa.Column('prompt_tokens', sa.Integer(), nullable=True),
sa.Column('completion_tokens', sa.Integer(), nullable=True),
sa.Column('total_tokens', sa.Integer(), nullable=True),
sa.Column('cost', sa.Float(), nullable=True),
sa.Column('latency', sa.Float(), nullable=True),
sa.Column('model_name', sa.String(length=50), nullable=False),
sa.Column('timestamp', sa.DateTime(), nullable=False),
sa.Column('additional_info', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.PrimaryKeyConstraint('id'),
schema='public'
)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('llm_usage_metric', schema='public')
# ### end Alembic commands ###

View File

@@ -26,22 +26,22 @@ greenlet~=3.0.3
gunicorn~=22.0.0 gunicorn~=22.0.0
Jinja2~=3.1.4 Jinja2~=3.1.4
kombu~=5.3.7 kombu~=5.3.7
langchain~=0.2.7 langchain~=0.3.0
langchain-anthropic~=0.1.19 langchain-anthropic~=0.2.0
langchain-community~=0.2.7 langchain-community~=0.3.0
langchain-core~=0.2.16 langchain-core~=0.3.0
langchain-mistralai~=0.1.9 langchain-mistralai~=0.2.0
langchain-openai~=0.1.15 langchain-openai~=0.2.0
langchain-postgres~=0.0.9 langchain-postgres~=0.0.12
langchain-text-splitters~=0.2.2 langchain-text-splitters~=0.3.0
langcodes~=3.4.0 langcodes~=3.4.0
langdetect~=1.0.9 langdetect~=1.0.9
langsmith~=0.1.81 langsmith~=0.1.81
openai~=1.35.13 openai~=1.45.1
pg8000~=1.31.2 pg8000~=1.31.2
pgvector~=0.2.5 pgvector~=0.2.5
pycryptodome~=3.20.0 pycryptodome~=3.20.0
pydantic~=2.7.4 pydantic~=2.9.1
PyJWT~=2.8.0 PyJWT~=2.8.0
PySocks~=1.7.1 PySocks~=1.7.1
python-dateutil~=2.9.0.post0 python-dateutil~=2.9.0.post0
@@ -53,7 +53,7 @@ pytz~=2024.1
PyYAML~=6.0.2rc1 PyYAML~=6.0.2rc1
redis~=5.0.4 redis~=5.0.4
requests~=2.32.3 requests~=2.32.3
SQLAlchemy~=2.0.31 SQLAlchemy~=2.0.35
tiktoken~=0.7.0 tiktoken~=0.7.0
tzdata~=2024.1 tzdata~=2024.1
urllib3~=2.2.2 urllib3~=2.2.2
@@ -76,4 +76,7 @@ PyPDF2~=3.0.1
flask-restx~=1.3.0 flask-restx~=1.3.0
prometheus-flask-exporter~=0.23.1 prometheus-flask-exporter~=0.23.1
flask-healthz~=1.0.1 flask-healthz~=1.0.1
langsmith~=0.1.121
anthropic~=0.34.2
prometheus-client~=0.20.0

View File

@@ -8,7 +8,7 @@ export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # In
chown -R appuser:appuser /app/logs chown -R appuser:appuser /app/logs
# Start a worker for the 'embeddings' queue with higher concurrency # Start a worker for the 'embeddings' queue with higher concurrency
celery -A eveai_workers.celery worker --loglevel=info -Q embeddings --autoscale=2,8 --hostname=embeddings_worker@%h & celery -A eveai_workers.celery worker --loglevel=debug -Q embeddings --autoscale=2,8 --hostname=embeddings_worker@%h &
# Start a worker for the 'llm_interactions' queue with auto-scaling - not necessary, in eveai_chat_workers # Start a worker for the 'llm_interactions' queue with auto-scaling - not necessary, in eveai_chat_workers
# celery -A eveai_workers.celery worker --loglevel=info - Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h & # celery -A eveai_workers.celery worker --loglevel=info - Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h &