- Adding a Tenant Type

- Allow filtering on Tenant Types & searching for parts of Tenant names - Implement health checks - Start Prometheus monitoring (needs to be finalized) - Refine audio_processor and srt_processor to reduce duplicate code and support for larger files - Introduce repopack to reason in LLMs about the code
2024-09-13 15:43:40 +02:00
parent 9e14824249
commit 6cf660e622
41 changed files with 687 additions and 579 deletions
--- a/common/extensions.py
+++ b/common/extensions.py
@@ -10,8 +10,8 @@ from flask_jwt_extended import JWTManager
 from flask_session import Session
 from flask_wtf import CSRFProtect
 from flask_restx import Api
+from prometheus_flask_exporter import PrometheusMetrics

-from .utils.nginx_utils import prefixed_url_for
 from .utils.simple_encryption import SimpleEncryption
 from .utils.minio_utils import MinioClient

@@ -31,3 +31,4 @@ session = Session()
 api_rest = Api()
 simple_encryption = SimpleEncryption()
 minio_client = MinioClient()
+metrics = PrometheusMetrics.for_app_factory()
--- a/common/models/user.py
+++ b/common/models/user.py
@@ -21,6 +21,7 @@ class Tenant(db.Model):
    website = db.Column(db.String(255), nullable=True)
    timezone = db.Column(db.String(50), nullable=True, default='UTC')
    rag_context = db.Column(db.Text, nullable=True)
+    type = db.Column(db.String(20), nullable=True, server_default='Active')

    # language information
    default_language = db.Column(db.String(2), nullable=True)
@@ -56,7 +57,6 @@ class Tenant(db.Model):
    encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
    encrypted_api_key = db.Column(db.String(500), nullable=True)

-
    # Tuning enablers
    embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
    rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
@@ -75,6 +75,7 @@ class Tenant(db.Model):
            'website': self.website,
            'timezone': self.timezone,
            'rag_context': self.rag_context,
+            'type': self.type,
            'default_language': self.default_language,
            'allowed_languages': self.allowed_languages,
            'embedding_model': self.embedding_model,
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -147,10 +147,10 @@ def select_model_variables(tenant):
            match llm_model:
                case 'gpt-4o' | 'gpt-4o-mini':
                    tool_calling_supported = True
-                    PDF_chunk_size = 10000
-                    PDF_chunk_overlap = 200
-                    PDF_min_chunk_size = 8000
-                    PDF_max_chunk_size = 12000
+                    processing_chunk_size = 10000
+                    processing_chunk_overlap = 200
+                    processing_min_chunk_size = 8000
+                    processing_max_chunk_size = 12000
                case _:
                    raise Exception(f'Error setting model variables for tenant {tenant.id} '
                                    f'error: Invalid chat model')
@@ -165,18 +165,18 @@ def select_model_variables(tenant):
                                                          model=llm_model_ext,
                                                          temperature=model_variables['RAG_temperature'])
            tool_calling_supported = True
-            PDF_chunk_size = 10000
-            PDF_chunk_overlap = 200
-            PDF_min_chunk_size = 8000
-            PDF_max_chunk_size = 12000
+            processing_chunk_size = 10000
+            processing_chunk_overlap = 200
+            processing_min_chunk_size = 8000
+            processing_max_chunk_size = 12000
        case _:
            raise Exception(f'Error setting model variables for tenant {tenant.id} '
                            f'error: Invalid chat provider')

-    model_variables['PDF_chunk_size'] = PDF_chunk_size
-    model_variables['PDF_chunk_overlap'] = PDF_chunk_overlap
-    model_variables['PDF_min_chunk_size'] = PDF_min_chunk_size
-    model_variables['PDF_max_chunk_size'] = PDF_max_chunk_size
+    model_variables['processing_chunk_size'] = processing_chunk_size
+    model_variables['processing_chunk_overlap'] = processing_chunk_overlap
+    model_variables['processing_min_chunk_size'] = processing_min_chunk_size
+    model_variables['processing_max_chunk_size'] = processing_max_chunk_size

    if tool_calling_supported:
        model_variables['cited_answer_cls'] = CitedAnswer