- Improvements to enable deployment in the cloud, mainly changing file access to Minio

- Improvements on RAG logging, and some debugging in that area
2024-08-01 17:35:54 +02:00
parent 88ca04136d
commit 64cf8df3a9
19 changed files with 617 additions and 206 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ docker/db/postgresql/
 docker/db/redis/
 docker/logs/
 docker/tenant_files/
+/docker/minio/
--- a/common/extensions.py
+++ b/common/extensions.py
@@ -11,6 +11,7 @@ from flask_session import Session
 from flask_wtf import CSRFProtect

 from .utils.key_encryption import JosKMSClient
+from .utils.minio_utils import MinioClient

 # Create extensions
 db = SQLAlchemy()
@@ -26,3 +27,4 @@ jwt = JWTManager()
 session = Session()

 kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
+minio_client = MinioClient()
--- a/common/langchain/EveAIRetriever.py
+++ b/common/langchain/EveAIRetriever.py
@@ -1,5 +1,5 @@
 from langchain_core.retrievers import BaseRetriever
-from sqlalchemy import func, and_, or_
+from sqlalchemy import func, and_, or_, desc
 from sqlalchemy.exc import SQLAlchemyError
 from pydantic import BaseModel, Field
 from typing import Any, Dict
@@ -20,12 +20,56 @@ class EveAIRetriever(BaseRetriever):
        self.tenant_info = tenant_info

    def _get_relevant_documents(self, query: str):
+
+
+
        current_app.logger.debug(f'Retrieving relevant documents for query: {query}')
        query_embedding = self._get_query_embedding(query)
        db_class = self.model_variables['embedding_db_model']
        similarity_threshold = self.model_variables['similarity_threshold']
        k = self.model_variables['k']

+        if self.tenant_info['rag_tuning']:
+            try:
+                current_date = get_date_in_timezone(self.tenant_info['timezone'])
+                current_app.rag_tuning_logger.debug(f'Current date: {current_date}\n')
+
+                # Debug query to show similarity for all valid documents (without chunk text)
+                debug_query = (
+                    db.session.query(
+                        Document.id.label('document_id'),
+                        DocumentVersion.id.label('version_id'),
+                        db_class.id.label('embedding_id'),
+                        (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity')
+                    )
+                    .join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
+                    .join(Document, DocumentVersion.doc_id == Document.id)
+                    .filter(
+                        or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
+                        or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date)
+                    )
+                    .order_by(desc('similarity'))
+                )
+
+                debug_results = debug_query.all()
+
+                current_app.logger.debug("Debug: Similarity for all valid documents:")
+                for row in debug_results:
+                    current_app.rag_tuning_logger.debug(f"Doc ID: {row.document_id}, "
+                                                        f"Version ID: {row.version_id}, "
+                                                        f"Embedding ID: {row.embedding_id}, "
+                                                        f"Similarity: {row.similarity}")
+                current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
+            except SQLAlchemyError as e:
+                current_app.logger.error(f'Error generating overview: {e}')
+                db.session.rollback()
+
+        if self.tenant_info['rag_tuning']:
+            current_app.rag_tuning_logger.debug(f'Parameters for Retrieval of documents: \n')
+            current_app.rag_tuning_logger.debug(f'Similarity Threshold: {similarity_threshold}\n')
+            current_app.rag_tuning_logger.debug(f'K: {k}\n')
+            current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
+
        try:
            current_date = get_date_in_timezone(self.tenant_info['timezone'])
            # Subquery to find the latest version of each document
@@ -40,24 +84,31 @@ class EveAIRetriever(BaseRetriever):
            # Main query to filter embeddings
            query_obj = (
                db.session.query(db_class,
-                                 db_class.embedding.cosine_distance(query_embedding).label('distance'))
+                                 (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity'))
                .join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
                .join(Document, DocumentVersion.doc_id == Document.id)
                .join(subquery, DocumentVersion.id == subquery.c.latest_version_id)
                .filter(
-                    or_(Document.valid_from.is_(None), Document.valid_from <= current_date),
-                    or_(Document.valid_to.is_(None), Document.valid_to >= current_date),
-                    db_class.embedding.cosine_distance(query_embedding) < similarity_threshold
+                    or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
+                    or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
+                    (1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold
                )
-                .order_by('distance')
+                .order_by(desc('similarity'))
                .limit(k)
            )

+            if self.tenant_info['rag_tuning']:
+                current_app.rag_tuning_logger.debug(f'Query executed for Retrieval of documents: \n')
+                current_app.rag_tuning_logger.debug(f'{query_obj.statement}\n')
+                current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
+
            res = query_obj.all()

            if self.tenant_info['rag_tuning']:
-                current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents')
-                current_app.rag_tuning_logger.debug(f'---------------------------------------')
+                current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents \n')
+                current_app.rag_tuning_logger.debug(f'Data retrieved: \n')
+                current_app.rag_tuning_logger.debug(f'{res}\n')
+                current_app.rag_tuning_logger.debug(f'---------------------------------------\n')

            result = []
            for doc in res:
--- a/common/models/user.py
+++ b/common/models/user.py
@@ -82,7 +82,6 @@ class Tenant(db.Model):
            'html_excluded_elements': self.html_excluded_elements,
            'min_chunk_size': self.min_chunk_size,
            'max_chunk_size': self.max_chunk_size,
-            'es_k'
            'es_k': self.es_k,
            'es_similarity_threshold': self.es_similarity_threshold,
            'chat_RAG_temperature': self.chat_RAG_temperature,
--- a/common/utils/minio_utils.py
+++ b/common/utils/minio_utils.py
@@ -0,0 +1,86 @@
+from minio import Minio
+from minio.error import S3Error
+from flask import Flask
+import io
+from werkzeug.datastructures import FileStorage
+
+class MinioClient:
+    def __init__(self):
+        self.client = None
+
+    def init_app(self, app: Flask):
+        self.client = Minio(
+            app.config['MINIO_ENDPOINT'],
+            access_key=app.config['MINIO_ACCESS_KEY'],
+            secret_key=app.config['MINIO_SECRET_KEY'],
+            secure=app.config.get('MINIO_USE_HTTPS', False)
+        )
+        app.logger.info(f"MinIO client initialized with endpoint: {app.config['MINIO_ENDPOINT']}")
+
+    def generate_bucket_name(self, tenant_id):
+        return f"tenant-{tenant_id}-bucket"
+
+    def create_tenant_bucket(self, tenant_id):
+        bucket_name = self.generate_bucket_name(tenant_id)
+        try:
+            if not self.client.bucket_exists(bucket_name):
+                self.client.make_bucket(bucket_name)
+                return bucket_name
+            return bucket_name
+        except S3Error as err:
+            raise Exception(f"Error occurred while creating bucket: {err}")
+
+    def generate_object_name(self, document_id, language, version_id, filename):
+        return f"{document_id}/{language}/{version_id}/{filename}"
+
+    def upload_document_file(self, tenant_id, document_id, language, version_id, filename, file_data):
+        bucket_name = self.generate_bucket_name(tenant_id)
+        object_name = self.generate_object_name(document_id, language, version_id, filename)
+
+        try:
+            if isinstance(file_data, FileStorage):
+                file_data = file_data.read()
+            elif isinstance(file_data, io.BytesIO):
+                file_data = file_data.getvalue()
+            elif isinstance(file_data, str):
+                file_data = file_data.encode('utf-8')
+            elif not isinstance(file_data, bytes):
+                raise TypeError('Unsupported file type. Expected FileStorage, BytesIO, str, or bytes.')
+
+            self.client.put_object(
+                bucket_name, object_name, io.BytesIO(file_data), len(file_data)
+            )
+            return True
+        except S3Error as err:
+            raise Exception(f"Error occurred while uploading file: {err}")
+
+    def download_document_file(self, tenant_id, document_id, language, version_id, filename):
+        bucket_name = self.generate_bucket_name(tenant_id)
+        object_name = self.generate_object_name(document_id, language, version_id, filename)
+        try:
+            response = self.client.get_object(bucket_name, object_name)
+            return response.read()
+        except S3Error as err:
+            raise Exception(f"Error occurred while downloading file: {err}")
+
+    def list_document_files(self, tenant_id, document_id, language=None, version_id=None):
+        bucket_name = self.generate_bucket_name(tenant_id)
+        prefix = f"{document_id}/"
+        if language:
+            prefix += f"{language}/"
+            if version_id:
+                prefix += f"{version_id}/"
+        try:
+            objects = self.client.list_objects(bucket_name, prefix=prefix, recursive=True)
+            return [obj.object_name for obj in objects]
+        except S3Error as err:
+            raise Exception(f"Error occurred while listing files: {err}")
+
+    def delete_document_file(self, tenant_id, document_id, language, version_id, filename):
+        bucket_name = self.generate_bucket_name(tenant_id)
+        object_name = self.generate_object_name(document_id, language, version_id, filename)
+        try:
+            self.client.remove_object(bucket_name, object_name)
+            return True
+        except S3Error as err:
+            raise Exception(f"Error occurred while deleting file: {err}")
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -141,7 +141,7 @@ def select_model_variables(tenant):
                                                       default_headers=portkey_headers)
            tool_calling_supported = False
            match llm_model:
-                case 'gpt-4-turbo' | 'gpt-4o':
+                case 'gpt-4-turbo' | 'gpt-4o' | 'gpt-4o-mini':
                    tool_calling_supported = True
                case _:
                    raise Exception(f'Error setting model variables for tenant {tenant.id} '
--- a/config/config.py
+++ b/config/config.py
@@ -61,7 +61,7 @@ class Config(object):

    # supported LLMs
    SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'openai.text-embedding-3-large', 'mistral.mistral-embed']
-    SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet']
+    SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet', 'openai.gpt-4o-mini']

    ANTHROPIC_LLM_VERSIONS = {'claude-3-5-sonnet': 'claude-3-5-sonnet-20240620', }

@@ -71,6 +71,7 @@ class Config(object):
    # Annotation text chunk length
    ANNOTATION_TEXT_CHUNK_LENGTH = {
        'openai.gpt-4o': 10000,
+        'openai.gpt-4o-mini': 10000,
        'anthropic.claude-3-5-sonnet': 8000
    }

@@ -184,12 +185,95 @@ class DevConfig(Config):
    # PATH settings
    ffmpeg_path = '/usr/bin/ffmpeg'

+    # MINIO
+    MINIO_ENDPOINT = 'minio:9000'
+    MINIO_ACCESS_KEY = 'minioadmin'
+    MINIO_SECRET_KEY = 'minioadmin'
+

 class ProdConfig(Config):
    DEVELOPMENT = False
    DEBUG = False
-    # SQLALCHEMY_DATABASE_URI = environ.get('SQLALCHEMY_DATABASE_URI') or \
-    #                           'sqlite:///' + os.path.join(basedir, 'db.sqlite')
+    DEVELOPMENT = True
+    DEBUG = True
+    FLASK_DEBUG = True
+    PYCHARM_DEBUG = False
+    DB_HOST = environ.get('DB_HOST', 'bswnz4.stackhero-network.com')
+    DB_USER = environ.get('DB_USER', 'luke_skywalker')
+    DB_PASS = environ.get('DB_PASS', '2MK&1rHmWEydE2rFuJLq*ls%tdkPAk2')
+    DB_NAME = environ.get('DB_NAME', 'eveai')
+    DB_PORT = environ.get('DB_PORT', '5945')
+
+    SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
+    SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
+    EXPLAIN_TEMPLATE_LOADING = False
+
+    # Define the nginx prefix used for the specific apps
+    EVEAI_APP_LOCATION_PREFIX = '/admin'
+    EVEAI_CHAT_LOCATION_PREFIX = '/chat'
+
+    # flask-mailman settings
+    MAIL_USERNAME = 'eveai_super@flow-it.net'
+    MAIL_PASSWORD = '$6xsWGbNtx$CFMQZqc*'
+
+    # file upload settings
+    UPLOAD_FOLDER = '/app/tenant_files'
+
+    REDIS_USER = 'admin'
+    REDIS_PASS = 'b32vtDtLriSY1fL2zGrZg8IZKI0g9ucsLtVNanRFAras6oZ51wjVNB1Y05uG7uEw'
+    REDIS_URL = '8bciqc.stackhero-network.com'
+    REDIS_PORT = '9961'
+    REDIS_BASE_URI = f'redis://{REDIS_USER}:{REDIS_PASS}@{REDIS_URL}:{REDIS_PORT}'
+
+    # Celery settings
+    # eveai_app Redis Settings
+    CELERY_BROKER_URL = f'{REDIS_BASE_URI}/0'
+    CELERY_RESULT_BACKEND = f'{REDIS_BASE_URI}/0'
+    # eveai_chat Redis Settings
+    CELERY_BROKER_URL_CHAT = f'{REDIS_BASE_URI}/3'
+    CELERY_RESULT_BACKEND_CHAT = f'{REDIS_BASE_URI}/3'
+
+    # Session settings
+    SESSION_REDIS = redis.from_url(f'{REDIS_BASE_URI}/2')
+
+    # OpenAI API Keys
+    OPENAI_API_KEY = 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
+
+    # Groq API Keys
+    GROQ_API_KEY = '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'
+
+    # Anthropic API Keys
+    ANTHROPIC_API_KEY = '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'
+
+    # Portkey API Keys
+    PORTKEY_API_KEY = 'T2Dt4QTpgCvWxa1OftYCJtj7NcDZ'
+
+    # Unstructured settings
+    UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
+    UNSTRUCTURED_BASE_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io'
+    UNSTRUCTURED_FULL_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io/general/v0/general'
+
+    # SocketIO settings
+    SOCKETIO_MESSAGE_QUEUE = f'{REDIS_BASE_URI}/1'
+    SOCKETIO_CORS_ALLOWED_ORIGINS = '*'
+    SOCKETIO_LOGGER = True
+    SOCKETIO_ENGINEIO_LOGGER = True
+    SOCKETIO_PING_TIMEOUT = 20000
+    SOCKETIO_PING_INTERVAL = 25000
+    SOCKETIO_MAX_IDLE_TIME = timedelta(minutes=60)  # Changing this value ==> change maxConnectionDuration value in
+    # eveai-chat-widget.js
+
+    # Google Cloud settings
+    GC_PROJECT_NAME = 'eveai-420711'
+    GC_LOCATION = 'europe-west1'
+    GC_KEY_RING = 'eveai-chat'
+    GC_CRYPTO_KEY = 'envelope-encryption-key'
+
+    # JWT settings
+    JWT_SECRET_KEY = 'bsdMkmQ8ObfMD52yAFg4trrvjgjMhuIqg2fjDpD/JqvgY0ccCcmlsEnVFmR79WPiLKEA3i8a5zmejwLZKl4v9Q=='
+
+    # PATH settings
+    ffmpeg_path = '/usr/bin/ffmpeg'


 config = {
--- a/config/prompts/openai/gpt-4o-mini.yaml
+++ b/config/prompts/openai/gpt-4o-mini.yaml
@@ -0,0 +1,79 @@
+html_parse: |
+  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+  
+  # Best practices are:
+  - Respect wordings and language(s) used in the HTML.
+  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
+  - Be careful of encoding of the text. Everything needs to be human readable.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
+
+  HTML is between triple backquotes.
+
+  ```{html}```  
+
+pdf_parse: |
+  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+  # Best practices are:
+  - Respect wordings and language(s) used in the PDF.
+  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+  - When headings are numbered, show the numbering and define the header level. 
+  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
+  - Paragraphs are to be stripped of newlines so they become easily readable.
+  - Be careful of encoding of the text. Everything needs to be human readable.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
+
+  PDF content is between triple backquotes.
+
+  ```{pdf_content}```
+
+summary: |
+  Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
+  ```{text}```
+
+rag: |
+  Answer the question based on the following context, delimited between triple backquotes. 
+  {tenant_context}
+  Use the following {language} in your communication, and cite the sources used.
+  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
+  Context:
+  ```{context}```
+  Question:
+  {question}
+
+history: |
+  You are a helpful assistant that details a question based on a previous context,
+  in such a way that the question is understandable without the previous context. 
+  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
+  The history is delimited between triple backquotes.
+  You answer by stating the question in {language}.
+  History:
+  ```{history}```
+  Question to be detailed:
+  {question}
+
+encyclopedia: |
+  You have a lot of background knowledge, and as such you are some kind of 
+  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
+  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
+  Question:
+  {question}
+
+transcript: |
+  You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
+
+  # Best practices and steps are:
+  - Respect wordings and language(s) used in the transcription. Main language is {language}.
+  - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.    
+  - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
+  - annotate the text to identify these logical parts using headings in {language}.
+  - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
+
+  The transcript is between triple backquotes.
+
+  ```{transcript}```
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -15,6 +15,9 @@ x-common-variables: &common-variables
  DB_NAME: eveai
  FLASK_ENV: development
  FLASK_DEBUG: 1
+  MINIO_ENDPOINT: minio:9000
+  MINIO_ACCESS_KEY: minioadmin
+  MINIO_SECRET_KEY: minioadmin

 services:
  nginx:
@@ -48,12 +51,13 @@ services:
      - ../scripts:/app/scripts
      - ../patched_packages:/app/patched_packages
      - ./logs:/app/logs
-      - ./tenant_files:/app/tenant_files
    depends_on:
       db:
         condition: service_healthy
       redis:
         condition: service_healthy
+       minio:
+         condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5001/health"]
      interval: 10s
@@ -76,12 +80,13 @@ services:
      - ../scripts:/app/scripts
      - ../patched_packages:/app/patched_packages
      - ./logs:/app/logs
-      - ./tenant_files:/app/tenant_files
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
+      minio:
+        condition: service_healthy
 #    healthcheck:
 #      test: [ "CMD", "curl", "-f", "http://localhost:5001/health" ]
 #      interval: 10s
@@ -174,7 +179,30 @@ services:
      interval: 10s
      timeout: 5s
      retries: 5
-#volumes:
+
+  minio:
+    image: minio/minio
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    expose:
+      - 9000
+    volumes:
+      - ./minio/data:/data
+      - ./minio/config:/root/.minio
+    environment:
+      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
+      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
+    command: server /data --console-address ":9001"
+    healthcheck:
+      test: [ "CMD", "mc", "ready", "local" ]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+      start_period: 30s
+
+volumes:
+  minio_data:
 #  db-data:
 #  redis-data:
 #  tenant-files:
--- a/eveai_app/init.py
+++ b/eveai_app/init.py
@@ -6,7 +6,8 @@ from flask_security.signals import user_authenticated
 from werkzeug.middleware.proxy_fix import ProxyFix
 import logging.config

-from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session
+from common.extensions import (db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session,
+                               minio_client)
 from common.models.user import User, Role, Tenant, TenantDomain
 import common.models.interaction
 from config.logging_config import LOGGING
@@ -102,6 +103,7 @@ def register_extensions(app):
    cors.init_app(app)
    kms_client.init_app(app)
    session.init_app(app)
+    minio_client.init_app(app)


 # Register Blueprints
--- a/eveai_app/views/document_views.py
+++ b/eveai_app/views/document_views.py
@@ -14,9 +14,10 @@ import requests
 from requests.exceptions import SSLError
 from urllib.parse import urlparse
 import io
+from minio.error import S3Error

 from common.models.document import Document, DocumentVersion
-from common.extensions import db
+from common.extensions import db, minio_client
 from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
    AddURLsForm
 from common.utils.middleware import mw_before_request
@@ -558,33 +559,34 @@ def upload_file_for_version(doc_vers, file, extension):
    doc_vers.file_name = doc_vers.calc_file_name()
    doc_vers.file_location = doc_vers.calc_file_location()

-    upload_path = os.path.join(current_app.config['UPLOAD_FOLDER'], doc_vers.file_location)
-    if not os.path.exists(upload_path):
-        os.makedirs(upload_path, exist_ok=True)
-    if isinstance(file, FileStorage):
-        file.save(os.path.join(upload_path, doc_vers.file_name))
-    elif isinstance(file, io.BytesIO):
-        # It's a BytesIO object, handle accordingly
-        # Example: write content to a file manually
-        with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
-            f.write(file.getvalue())
-    elif isinstance(file, str):
-        # It's a string, handle accordingly
-        with open(os.path.join(upload_path, doc_vers.file_name), 'w') as f:
-            f.write(file)
-    else:
-        raise TypeError('Unsupported file type.')
+    # Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
+    tenant_id = session['tenant']['id']
+    minio_client.create_tenant_bucket(tenant_id)

    try:
+        minio_client.upload_document_file(
+            tenant_id,
+            doc_vers.doc_id,
+            doc_vers.language,
+            doc_vers.id,
+            doc_vers.file_name,
+            file
+        )
        db.session.commit()
+        current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
+                                f'document version {doc_vers.id} while uploading file.')
+    except S3Error as e:
+        db.session.rollback()
+        flash('Error saving document to MinIO.', 'error')
+        current_app.logger.error(
+            f'Error saving document to MinIO for tenant {tenant_id}: {e}')
+        raise
    except SQLAlchemyError as e:
        db.session.rollback()
-        flash('Error saving document.', 'error')
+        flash('Error saving document metadata.', 'error')
        current_app.logger.error(
-            f'Error saving document for tenant {session["tenant"]["id"]} while uploading file: {e}')
-
-    current_app.logger.info(f'Succesfully saved document for tenant {session['tenant']['id']} for '
-                            f'document version {doc_vers.id} while uploading file.')
+            f'Error saving document metadata for tenant {tenant_id}: {e}')
+        raise


 def fetch_html(url):
--- a/eveai_app/views/user_views.py
+++ b/eveai_app/views/user_views.py
@@ -8,7 +8,7 @@ from sqlalchemy.exc import SQLAlchemyError
 import ast

 from common.models.user import User, Tenant, Role, TenantDomain
-from common.extensions import db, kms_client, security
+from common.extensions import db, kms_client, security, minio_client
 from common.utils.security_utils import send_confirmation_email, send_reset_email
 from .user_forms import TenantForm, CreateUserForm, EditUserForm, TenantDomainForm
 from common.utils.database import Database
@@ -61,12 +61,13 @@ def tenant():
        #                     rag_tuning=form.rag_tuning.data)

        # Handle Embedding Variables
-        new_tenant.html_tags = form.html_tags.data.split(',') if form.html_tags.data else []
-        new_tenant.html_end_tags = form.html_end_tags.data.split(',') if form.html_end_tags.data else []
-        new_tenant.html_included_elements = form.html_included_elements.data.split(
-            ',') if form.html_included_elements.data else []
-        new_tenant.html_excluded_elements = form.html_excluded_elements.data.split(
-            ',') if form.html_excluded_elements.data else []
+        new_tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
+        new_tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
+            if form.html_end_tags.data else []
+        new_tenant.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
+            if form.html_included_elements.data else []
+        new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
+            if form.html_excluded_elements.data else []

        current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
                                 f'html_end_tags: {new_tenant.html_end_tags},'
@@ -87,11 +88,17 @@ def tenant():
            flash(f'Failed to add tenant to database. Error: {str(e)}')
            return render_template('user/tenant.html', form=form)

-        # Create schema for new tenant
        current_app.logger.info(f"Successfully created tenant {new_tenant.id} in Database")
        flash(f"Successfully created tenant {new_tenant.id} in Database")
+
+        # Create schema for new tenant
        current_app.logger.info(f"Creating schema for tenant {new_tenant.id}")
        Database(new_tenant.id).create_tenant_schema()
+
+        # Create MinIO bucket for new tenant
+        current_app.logger.info(f"Creating MinIO bucket for tenant {new_tenant.id}")
+        minio_client.create_tenant_bucket(new_tenant.id)
+
        return redirect(prefixed_url_for('basic_bp.index'))
    else:
        form_validation_failed(request, form)
--- a/eveai_chat_workers/tasks.py
+++ b/eveai_chat_workers/tasks.py
@@ -81,6 +81,12 @@ def ask_question(tenant_id, question, language, session_id, user_timezone):
                current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
                raise

+        if tenant.rag_tuning:
+            current_app.rag_tuning_logger.debug(f'Received question for tenant {tenant_id}:\n{question}. Processing...')
+            current_app.rag_tuning_logger.debug(f'Tenant Information: \n{tenant.to_dict()}')
+            current_app.rag_tuning_logger.debug(f'===================================================================')
+            current_app.rag_tuning_logger.debug(f'===================================================================')
+
        result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
        result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
        result['interaction_id'] = interaction.id
@@ -116,6 +122,9 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):

    detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
    current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
+    if tenant.rag_tuning:
+        current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
+        current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
    new_interaction.detailed_question = detailed_question
    new_interaction.detailed_question_at = dt.now(tz.utc)

@@ -126,6 +135,9 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
    full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
    rag_prompt = ChatPromptTemplate.from_template(full_template)
    setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
+    if tenant.rag_tuning:
+        current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
+        current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')

    new_interaction_embeddings = []
    if not model_variables['cited_answer_cls']:  # The model doesn't support structured feedback
@@ -151,6 +163,11 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
        current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
        current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
        current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
+        if tenant.rag_tuning:
+            current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
+            current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
+            current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
+            current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
        new_interaction.answer = result['answer']

        # Filter out the existing Embedding IDs
@@ -161,7 +178,11 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
            .all()
        )
        existing_embedding_ids = [emb.id for emb in embeddings]
-        urls = [emb.document_version.url for emb in embeddings]
+        urls = list(set(emb.document_version.url for emb in embeddings))
+        if tenant.rag_tuning:
+            current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
+            current_app.rag_tuning_logger.debug(f'{urls}')
+            current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')

        for emb_id in existing_embedding_ids:
            new_interaction_embedding = InteractionEmbedding(embedding_id=emb_id)
--- a/eveai_workers/init.py
+++ b/eveai_workers/init.py
@@ -3,7 +3,7 @@ import logging.config
 from flask import Flask

 from common.utils.celery_utils import make_celery, init_celery
-from common.extensions import db
+from common.extensions import db, minio_client
 from config.logging_config import LOGGING


@@ -33,6 +33,7 @@ def create_app(config_file=None):

 def register_extensions(app):
    db.init_app(app)
+    minio_client.init_app(app)


 app, celery = create_app()
--- a/eveai_workers/tasks.py
+++ b/eveai_workers/tasks.py
@@ -1,3 +1,4 @@
+import io
 import os
 from datetime import datetime as dt, timezone as tz
 import subprocess
@@ -21,7 +22,7 @@ import PyPDF2
 from pydub import AudioSegment
 import tempfile

-from common.extensions import db
+from common.extensions import db, minio_client
 from common.models.document import DocumentVersion, Embedding
 from common.models.user import Tenant
 from common.utils.celery_utils import current_celery
@@ -32,11 +33,6 @@ from common.utils.os_utils import safe_remove, sync_folder

@current_celery.task(name='create_embeddings', queue='embeddings')
 def create_embeddings(tenant_id, document_version_id):
-    # Setup Remote Debugging only if PYCHARM_DEBUG=True
-    if current_app.config['PYCHARM_DEBUG']:
-        import pydevd_pycharm
-        pydevd_pycharm.settrace('localhost', port=50170, stdoutToServer=True, stderrToServer=True)
-
    current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id}.')

    try:
@@ -50,6 +46,7 @@ def create_embeddings(tenant_id, document_version_id):

        # Select variables to work with depending on tenant and model
        model_variables = select_model_variables(tenant)
+        current_app.logger.debug(f'Model variables: {model_variables}')

        # Retrieve document version to process
        document_version = DocumentVersion.query.get(document_version_id)
@@ -107,33 +104,20 @@ def create_embeddings(tenant_id, document_version_id):


 def process_pdf(tenant, model_variables, document_version):
-    base_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
-                             document_version.file_location)
-    file_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
-                             document_version.file_location,
-                             document_version.file_name)
-    if os.path.exists(file_path):
+    file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
+                                       document_version.id, document_version.file_name)
+
    pdf_text = ''
-        # Function to extract text from PDF and return as string
-        with open(file_path, 'rb') as file:
-            reader = PyPDF2.PdfReader(file)
-            for page_num in range(len(reader.pages)):
-                page = reader.pages[page_num]
+    pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data))
+    for page in pdf_reader.pages:
        pdf_text += page.extract_text()
-    else:
-        current_app.logger.error(f'The physical file for document version {document_version.id} '
-                                 f'for tenant {tenant.id} '
-                                 f'at {file_path} does not exist')
-        create_embeddings.update_state(state=states.FAILURE)
-        raise

    markdown = generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_text)
    markdown_file_name = f'{document_version.id}.md'
-    output_file = os.path.join(base_path, markdown_file_name)
-    with open(output_file, 'w') as f:
-        f.write(markdown)
+    minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
+                         markdown_file_name, markdown.encode())

-    potential_chunks = create_potential_chunks_for_markdown(base_path, markdown_file_name, tenant)
+    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
    chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
                                         model_variables['max_chunk_size'])

@@ -175,43 +159,29 @@ def delete_embeddings_for_document_version(document_version):


 def process_html(tenant, model_variables, document_version):
+    file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
+                                       document_version.id, document_version.file_name)
+    html_content = file_data.decode('utf-8')
+
    # The tags to be considered can be dependent on the tenant
    html_tags = model_variables['html_tags']
    html_end_tags = model_variables['html_end_tags']
    html_included_elements = model_variables['html_included_elements']
    html_excluded_elements = model_variables['html_excluded_elements']

-    base_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
-                             document_version.file_location)
-
-    file_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
-                             document_version.file_location,
-                             document_version.file_name)
-
-    if os.path.exists(file_path):
-        with open(file_path, 'rb') as f:
-            html_content = f.read()
-    else:
-        current_app.logger.error(f'The physical file for document version {document_version.id} '
-                                 f'for tenant {tenant.id} '
-                                 f'at {file_path} does not exist')
-        create_embeddings.update_state(state=states.FAILURE)
-        raise
-
    extracted_html, title = parse_html(tenant, html_content, html_tags, included_elements=html_included_elements,
                                       excluded_elements=html_excluded_elements)
+
    extracted_file_name = f'{document_version.id}-extracted.html'
-    output_file = os.path.join(base_path, extracted_file_name)
-    with open(output_file, 'w') as f:
-        f.write(extracted_html)
+    minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
+                         extracted_file_name, extracted_html.encode())

    markdown = generate_markdown_from_html(tenant, model_variables, document_version, extracted_html)
    markdown_file_name = f'{document_version.id}.md'
-    output_file = os.path.join(base_path, markdown_file_name)
-    with open(output_file, 'w') as f:
-        f.write(markdown)
+    minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
+                         markdown_file_name, markdown.encode())

-    potential_chunks = create_potential_chunks_for_markdown(base_path, markdown_file_name, tenant)
+    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
    chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
                                         model_variables['max_chunk_size'])

@@ -222,7 +192,7 @@ def process_html(tenant, model_variables, document_version):
    else:
        document_version.system_context = (f'Title: {title}\n')

-    enriched_chunks = enrich_chunks(tenant, document_version, chunks)
+    enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
    embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)

    try:
@@ -241,16 +211,17 @@ def process_html(tenant, model_variables, document_version):
                            f'on document version {document_version.id} :-)')


-def enrich_chunks(tenant, document_version, chunks):
+def enrich_chunks(tenant, document_version, title, chunks):
    current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
                             f'on document version {document_version.id}')
    current_app.logger.debug(f'Nr of chunks: {len(chunks)}')
    chunk_total_context = (f'Filename: {document_version.file_name}\n'
-                           f'User Context:{document_version.user_context}\n'
+                           f'User Context:\n{document_version.user_context}\n\n'
                           f'{document_version.system_context}\n\n')
    enriched_chunks = []
    initial_chunk = (f'Filename: {document_version.file_name}\n'
                     f'User Context:\n{document_version.user_context}\n\n'
+                     f'Title: {title}\n'
                     f'{chunks[0]}')

    enriched_chunks.append(initial_chunk)
@@ -311,7 +282,7 @@ def summarize_chunk(tenant, model_variables, document_version, chunk):
    text_to_summarize = doc_creator.create_documents(chunk)

    try:
-        summary = chain.run(text_to_summarize)
+        summary = chain.invoke({"text": text_to_summarize})
        current_app.logger.debug(f'Finished summarizing chunk for tenant {tenant.id} '
                                 f'on document version {document_version.id}.')
        return summary
@@ -391,23 +362,26 @@ def process_youtube(tenant, model_variables, document_version):
    markdown_file_name = f'{document_version.id}.md'

    # Remove existing files (in case of a re-processing of the file
-    safe_remove(os.path.join(base_path, download_file_name))
-    safe_remove(os.path.join(base_path, compressed_file_name))
-    safe_remove(os.path.join(base_path, transcription_file_name))
-    safe_remove(os.path.join(base_path, markdown_file_name))
-    sync_folder(base_path)
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
+                                      document_version.id, download_file_name)
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
+                                      document_version.id, compressed_file_name)
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
+                                      document_version.id, transcription_file_name)
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
+                                      document_version.id, markdown_file_name)

-    of, title, description, author = download_youtube(document_version.url, base_path, download_file_name, tenant)
+    of, title, description, author = download_youtube(document_version.url, tenant.id, document_version,
+                                                      download_file_name)
    document_version.system_context = f'Title: {title}\nDescription: {description}\nAuthor: {author}'
-    compress_audio(base_path, download_file_name, compressed_file_name, tenant)
-    transcribe_audio(base_path, compressed_file_name, transcription_file_name,
-                     document_version.language, tenant, model_variables)
-    annotate_transcription(base_path, transcription_file_name, markdown_file_name,
-                           document_version.language, tenant, model_variables)
+    compress_audio(tenant.id, document_version, download_file_name, compressed_file_name)
+    transcribe_audio(tenant.id, document_version, compressed_file_name, transcription_file_name, model_variables)
+    annotate_transcription(tenant, document_version, transcription_file_name, markdown_file_name, model_variables)

-    potential_chunks = create_potential_chunks_for_markdown(base_path, markdown_file_name, tenant)
+    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
    actual_chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
                                                model_variables['max_chunk_size'])
+
    enriched_chunks = enrich_chunks(tenant, document_version, actual_chunks)
    embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)

@@ -427,27 +401,41 @@ def process_youtube(tenant, model_variables, document_version):
                            f'on Youtube document version {document_version.id} :-)')


-def download_youtube(url, file_location, file_name, tenant):
+def download_youtube(url, tenant_id, document_version, file_name):
    try:
-        current_app.logger.info(f'Downloading YouTube video: {url} on location {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Downloading YouTube video: {url} for tenant: {tenant_id}')
        yt = YouTube(url)
        stream = yt.streams.get_audio_only()
-        output_file = stream.download(output_path=file_location, filename=file_name)
-        current_app.logger.info(f'Downloaded YouTube video: {url} on location {file_location} for tenant: {tenant.id}')
-        return output_file, yt.title, yt.description, yt.author
+
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            stream.download(output_path=temp_file.name)
+            with open(temp_file.name, 'rb') as f:
+                file_data = f.read()
+
+        minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id,
+                             file_name, file_data)
+
+        current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}')
+        return file_name, yt.title, yt.description, yt.author
    except Exception as e:
-        current_app.logger.error(f'Error downloading YouTube video: {url} on location {file_location} for '
-                                 f'tenant: {tenant.id} with error: {e}')
+        current_app.logger.error(f'Error downloading YouTube video: {url} for tenant: {tenant_id} with error: {e}')
        raise


-def compress_audio(file_location, input_file, output_file, tenant):
+def compress_audio(tenant_id, document_version, input_file, output_file):
    try:
-        current_app.logger.info(f'Compressing audio on {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Compressing audio for tenant: {tenant_id}')

-        # Run the compression script
+        input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
+                                            document_version.id, input_file)
+
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input:
+            temp_input.write(input_data)
+            temp_input.flush()
+
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_output:
                result = subprocess.run(
-            ['scripts/compress.sh', '-d', file_location, '-i', input_file, '-o', output_file],
+                    ['ffmpeg', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', temp_output.name],
                    capture_output=True,
                    text=True
                )
@@ -455,55 +443,30 @@ def compress_audio(file_location, input_file, output_file, tenant):
                if result.returncode != 0:
                    raise Exception(f"Compression failed: {result.stderr}")

-        output_file_path = os.path.join(file_location, output_file)
+                with open(temp_output.name, 'rb') as f:
+                    compressed_data = f.read()

-        # Additional check for file stability
-        previous_size = -1
-        stable_count = 0
-        max_attempts = 12  # 1 minute total wait time
+        minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id,
+                             output_file, compressed_data)

-        for _ in range(max_attempts):
-            if os.path.exists(output_file_path):
-                current_size = os.path.getsize(output_file_path)
-                if current_size == previous_size:
-                    stable_count += 1
-                    if stable_count >= 3:  # File size hasn't changed for 3 checks
-                        break
-                else:
-                    stable_count = 0
-                previous_size = current_size
-            gevent.sleep(5)
-
-        if stable_count < 3:
-            raise Exception("File size did not stabilize within the expected time")
-
-        current_app.logger.info(f'Compressed audio for {file_location} for tenant: {tenant.id}')
-        return output_file_path
+        current_app.logger.info(f'Compressed audio for tenant: {tenant_id}')
    except Exception as e:
-        current_app.logger.error(f'Error compressing audio on {file_location} for tenant: {tenant.id} with error: {e}')
+        current_app.logger.error(f'Error compressing audio for tenant: {tenant_id} with error: {e}')
        raise


-def transcribe_audio(file_location, input_file, output_file, language, tenant, model_variables):
+def transcribe_audio(tenant_id, document_version, input_file, output_file, model_variables):
    try:
-        current_app.logger.info(f'Transcribing audio on {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Transcribing audio for tenant: {tenant_id}')
        client = model_variables['transcription_client']
        model = model_variables['transcription_model']
-        input_file_path = os.path.join(file_location, input_file)
-        output_file_path = os.path.join(file_location, output_file)

-        # Wait for the input file to exist
-        count = 0
-        while not os.path.exists(input_file_path) and count < 10:
-            gevent.sleep(1)
-            current_app.logger.debug(f'Waiting for {input_file_path} to exist... Count: {count}')
-            count += 1
+        # Download the audio file from MinIO
+        audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
+                                            document_version.id, input_file)

-        if not os.path.exists(input_file_path):
-            raise FileNotFoundError(f"Input file {input_file_path} not found after waiting.")
-
-        # Load the audio file
-        audio = AudioSegment.from_file(input_file_path)
+        # Load the audio data into pydub
+        audio = AudioSegment.from_mp3(io.BytesIO(audio_data))

        # Define segment length (e.g., 10 minutes)
        segment_length = 10 * 60 * 1000  # 10 minutes in milliseconds
@@ -512,14 +475,16 @@ def transcribe_audio(file_location, input_file, output_file, language, tenant, m

        # Split audio into segments and transcribe each
        for i, chunk in enumerate(audio[::segment_length]):
-            current_app.logger.debug(f'Transcribing chunk {i} of {len(audio) // segment_length} ')
+            current_app.logger.debug(f'Transcribing chunk {i + 1} of {len(audio) // segment_length + 1}')
+
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
                chunk.export(temp_audio.name, format="mp3")
+
                with open(temp_audio.name, 'rb') as audio_segment:
                    transcription = client.audio.transcriptions.create(
                        file=audio_segment,
                        model=model,
-                        language=language,
+                        language=document_version.language,
                        response_format='verbose_json',
                    )

@@ -530,20 +495,25 @@ def transcribe_audio(file_location, input_file, output_file, language, tenant, m
        # Combine all transcriptions
        full_transcription = " ".join(transcriptions)

-        # Write the full transcription to the output file
-        with open(output_file_path, 'w') as f:
-            f.write(full_transcription)
+        # Upload the full transcription to MinIO
+        minio_client.upload_document_file(
+            tenant_id,
+            document_version.doc_id,
+            document_version.language,
+            document_version.id,
+            output_file,
+            full_transcription.encode('utf-8')
+        )

-        current_app.logger.info(f'Transcribed audio for {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Transcribed audio for tenant: {tenant_id}')
    except Exception as e:
-        current_app.logger.error(f'Error transcribing audio for {file_location} for tenant: {tenant.id}, '
-                                 f'with error: {e}')
+        current_app.logger.error(f'Error transcribing audio for tenant: {tenant_id}, with error: {e}')
        raise


-def annotate_transcription(file_location, input_file, output_file, language, tenant, model_variables):
+def annotate_transcription(tenant, document_version, input_file, output_file, model_variables):
    try:
-        current_app.logger.debug(f'Annotating transcription on {file_location} for tenant {tenant.id}')
+        current_app.logger.debug(f'Annotating transcription for tenant {tenant.id}')

        char_splitter = CharacterTextSplitter(separator='.',
                                              chunk_size=model_variables['annotation_chunk_length'],
@@ -552,18 +522,21 @@ def annotate_transcription(file_location, input_file, output_file, language, ten
        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
-            # ("###", "Header 3"),
        ]
        markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)

        llm = model_variables['llm']
        template = model_variables['transcript_template']
-        language_template = create_language_template(template, language)
+        language_template = create_language_template(template, document_version.language)
        transcript_prompt = ChatPromptTemplate.from_template(language_template)
        setup = RunnablePassthrough()
        output_parser = StrOutputParser()
-        with open(os.path.join(file_location, input_file), 'r') as f:
-            transcript = f.read()
+
+        # Download the transcription file from MinIO
+        transcript_data = minio_client.download_document_file(tenant.id, document_version.doc_id,
+                                                              document_version.language, document_version.id,
+                                                              input_file)
+        transcript = transcript_data.decode('utf-8')

        chain = setup | transcript_prompt | llm | output_parser

@@ -598,38 +571,53 @@ def annotate_transcription(file_location, input_file, output_file, language, ten
            markdown_chunks.pop()
            all_markdown_chunks += markdown_chunks

-
        all_markdown_chunks += [last_markdown_chunk]

        annotated_transcript = '\n'.join(all_markdown_chunks)

-        with open(os.path.join(file_location, output_file), 'w') as f:
-            f.write(annotated_transcript)
+        # Upload the annotated transcript to MinIO
+        minio_client.upload_document_file(
+            tenant.id,
+            document_version.doc_id,
+            document_version.language,
+            document_version.id,
+            output_file,
+            annotated_transcript.encode('utf-8')
+        )

-        current_app.logger.info(f'Annotated transcription for {file_location} for tenant {tenant.id}')
+        current_app.logger.info(f'Annotated transcription for tenant {tenant.id}')
    except Exception as e:
-        current_app.logger.error(f'Error annotating transcription for {file_location} for tenant {tenant.id}, '
-                                 f'with error: {e}')
+        current_app.logger.error(f'Error annotating transcription for tenant {tenant.id}, with error: {e}')
        raise


-def create_potential_chunks_for_markdown(base_path, input_file, tenant):
-    current_app.logger.info(f'Creating potential chunks for {base_path} for tenant {tenant.id}')
-    markdown = ''
-    with open(os.path.join(base_path, input_file), 'r') as f:
-        markdown = f.read()
+def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
+    try:
+        current_app.logger.info(f'Creating potential chunks for tenant {tenant_id}')
+
+        # Download the markdown file from MinIO
+        markdown_data = minio_client.download_document_file(tenant_id,
+                                                            document_version.doc_id,
+                                                            document_version.language,
+                                                            document_version.id,
+                                                            input_file
+                                                            )
+        markdown = markdown_data.decode('utf-8')

        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
-        # ("###", "Header 3"),
        ]

        markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
        md_header_splits = markdown_splitter.split_text(markdown)
        potential_chunks = [doc.page_content for doc in md_header_splits]

+        current_app.logger.debug(f'Created {len(potential_chunks)} potential chunks for tenant {tenant_id}')
        return potential_chunks
+    except Exception as e:
+        current_app.logger.error(f'Error creating potential chunks for tenant {tenant_id}, with error: {e}')
+        raise


 def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars):
--- a/nginx/public/chat_eveai_mini.html
+++ b/nginx/public/chat_eveai_mini.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Chat Client EveAI Mini</title>
+  <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
+  <script src="https://cdn.socket.io/4.0.1/socket.io.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+  <script src="/static/js/eveai-sdk.js" defer></script>
+  <script src="/static/js/eveai-chat-widget.js" defer></script>
+  <link rel="stylesheet" href="/static/css/eveai-chat-style.css">
+</head>
+<body>
+  <div id="chat-container"></div>
+  <script>
+    document.addEventListener('DOMContentLoaded', function() {
+      const eveAI = new EveAI(
+        '6',
+        'EveAI-CHAT-3622-2083-4559-6024-8786',
+        'http://macstudio.ask-eve-ai-local.com',
+        'en'
+      );
+      eveAI.initializeChat('chat-container');
+    });
+  </script>
+</body>
+</html>
--- a/nginx/public/chat_flow.html
+++ b/nginx/public/chat_flow.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Chat Client AE</title>
+  <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
+  <script src="https://cdn.socket.io/4.0.1/socket.io.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+  <script src="/static/js/eveai-sdk.js" defer></script>
+  <script src="/static/js/eveai-chat-widget.js" defer></script>
+  <link rel="stylesheet" href="/static/css/eveai-chat-style.css">
+</head>
+<body>
+  <div id="chat-container"></div>
+  <script>
+    document.addEventListener('DOMContentLoaded', function() {
+      const eveAI = new EveAI(
+        '2',
+        'EveAI-CHAT-8716-3188-4285-8044-9932',
+        'http://macstudio.ask-eve-ai-local.com',
+        'en'
+      );
+      eveAI.initializeChat('chat-container');
+    });
+  </script>
+</body>
+</html>
--- a/requirements.txt
+++ b/requirements.txt
@@ -76,3 +76,7 @@ groq~=0.9.0
 pydub~=0.25.1
 argparse~=1.4.0
 portkey_ai~=1.7.0
+
+minio~=7.2.7
+Werkzeug~=3.0.3
+itsdangerous~=2.2.0
--- a/scripts/start_eveai_chat.sh
+++ b/scripts/start_eveai_chat.sh
@@ -7,7 +7,7 @@ export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR"  # In
 # Set flask environment variables
 #export FLASK_ENV=development  # Use 'production' as appropriate
 #export FLASK_DEBUG=1  # Use 0 for production
-print "Starting EveAI Chat"
+echo "Starting EveAI Chat"

 # Start Flask app
 gunicorn -w 4 -k geventwebsocket.gunicorn.workers.GeventWebSocketWorker -b 0.0.0.0:5002 scripts.run_eveai_chat:app