- Adding a Tenant Type

- Allow filtering on Tenant Types & searching for parts of Tenant names
- Implement health checks
- Start Prometheus monitoring (needs to be finalized)
- Refine audio_processor and srt_processor to reduce duplicate code and support for larger files
- Introduce repopack to reason in LLMs about the code
This commit is contained in:
Josako
2024-09-13 15:43:40 +02:00
parent 9e14824249
commit 6cf660e622
41 changed files with 687 additions and 579 deletions

View File

@@ -10,8 +10,8 @@ from flask_jwt_extended import JWTManager
from flask_session import Session
from flask_wtf import CSRFProtect
from flask_restx import Api
from prometheus_flask_exporter import PrometheusMetrics
from .utils.nginx_utils import prefixed_url_for
from .utils.simple_encryption import SimpleEncryption
from .utils.minio_utils import MinioClient
@@ -31,3 +31,4 @@ session = Session()
api_rest = Api()
simple_encryption = SimpleEncryption()
minio_client = MinioClient()
metrics = PrometheusMetrics.for_app_factory()

View File

@@ -21,6 +21,7 @@ class Tenant(db.Model):
website = db.Column(db.String(255), nullable=True)
timezone = db.Column(db.String(50), nullable=True, default='UTC')
rag_context = db.Column(db.Text, nullable=True)
type = db.Column(db.String(20), nullable=True, server_default='Active')
# language information
default_language = db.Column(db.String(2), nullable=True)
@@ -56,7 +57,6 @@ class Tenant(db.Model):
encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
encrypted_api_key = db.Column(db.String(500), nullable=True)
# Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
@@ -75,6 +75,7 @@ class Tenant(db.Model):
'website': self.website,
'timezone': self.timezone,
'rag_context': self.rag_context,
'type': self.type,
'default_language': self.default_language,
'allowed_languages': self.allowed_languages,
'embedding_model': self.embedding_model,

View File

@@ -147,10 +147,10 @@ def select_model_variables(tenant):
match llm_model:
case 'gpt-4o' | 'gpt-4o-mini':
tool_calling_supported = True
PDF_chunk_size = 10000
PDF_chunk_overlap = 200
PDF_min_chunk_size = 8000
PDF_max_chunk_size = 12000
processing_chunk_size = 10000
processing_chunk_overlap = 200
processing_min_chunk_size = 8000
processing_max_chunk_size = 12000
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid chat model')
@@ -165,18 +165,18 @@ def select_model_variables(tenant):
model=llm_model_ext,
temperature=model_variables['RAG_temperature'])
tool_calling_supported = True
PDF_chunk_size = 10000
PDF_chunk_overlap = 200
PDF_min_chunk_size = 8000
PDF_max_chunk_size = 12000
processing_chunk_size = 10000
processing_chunk_overlap = 200
processing_min_chunk_size = 8000
processing_max_chunk_size = 12000
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid chat provider')
model_variables['PDF_chunk_size'] = PDF_chunk_size
model_variables['PDF_chunk_overlap'] = PDF_chunk_overlap
model_variables['PDF_min_chunk_size'] = PDF_min_chunk_size
model_variables['PDF_max_chunk_size'] = PDF_max_chunk_size
model_variables['processing_chunk_size'] = processing_chunk_size
model_variables['processing_chunk_overlap'] = processing_chunk_overlap
model_variables['processing_min_chunk_size'] = processing_min_chunk_size
model_variables['processing_max_chunk_size'] = processing_max_chunk_size
if tool_calling_supported:
model_variables['cited_answer_cls'] = CitedAnswer