- Improvements to enable deployment in the cloud, mainly changing file access to Minio

- Improvements on RAG logging, and some debugging in that area
2024-08-01 17:35:54 +02:00
parent 88ca04136d
commit 64cf8df3a9
19 changed files with 617 additions and 206 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ docker/db/postgresql/
 docker/db/redis/
 docker/logs/
 docker/tenant_files/
 /docker/minio/
--- a/common/extensions.py
+++ b/common/extensions.py
@@ -11,6 +11,7 @@ from flask_session import Session
 from flask_wtf import CSRFProtect
 from .utils.key_encryption import JosKMSClient
 from .utils.minio_utils import MinioClient
 # Create extensions
 db = SQLAlchemy()
@@ -26,3 +27,4 @@ jwt = JWTManager()
 session = Session()
 kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
 minio_client = MinioClient()
--- a/common/langchain/EveAIRetriever.py
+++ b/common/langchain/EveAIRetriever.py
@@ -1,5 +1,5 @@
 from langchain_core.retrievers import BaseRetriever
-from sqlalchemy import func, and_, or_
+from sqlalchemy import func, and_, or_, desc
 from sqlalchemy.exc import SQLAlchemyError
 from pydantic import BaseModel, Field
 from typing import Any, Dict
@@ -20,12 +20,56 @@ class EveAIRetriever(BaseRetriever):
        self.tenant_info = tenant_info
    def _get_relevant_documents(self, query: str):
        current_app.logger.debug(f'Retrieving relevant documents for query: {query}')
        query_embedding = self._get_query_embedding(query)
        db_class = self.model_variables['embedding_db_model']
        similarity_threshold = self.model_variables['similarity_threshold']
        k = self.model_variables['k']
        if self.tenant_info['rag_tuning']:
            try:
                current_date = get_date_in_timezone(self.tenant_info['timezone'])
                current_app.rag_tuning_logger.debug(f'Current date: {current_date}\n')
                # Debug query to show similarity for all valid documents (without chunk text)
                debug_query = (
                    db.session.query(
                        Document.id.label('document_id'),
                        DocumentVersion.id.label('version_id'),
                        db_class.id.label('embedding_id'),
                        (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity')
                    )
                    .join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
                    .join(Document, DocumentVersion.doc_id == Document.id)
                    .filter(
                        or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
                        or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date)
                    )
                    .order_by(desc('similarity'))
                )
                debug_results = debug_query.all()
                current_app.logger.debug("Debug: Similarity for all valid documents:")
                for row in debug_results:
                    current_app.rag_tuning_logger.debug(f"Doc ID: {row.document_id}, "
                                                        f"Version ID: {row.version_id}, "
                                                        f"Embedding ID: {row.embedding_id}, "
                                                        f"Similarity: {row.similarity}")
                current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
            except SQLAlchemyError as e:
                current_app.logger.error(f'Error generating overview: {e}')
                db.session.rollback()
        if self.tenant_info['rag_tuning']:
            current_app.rag_tuning_logger.debug(f'Parameters for Retrieval of documents: \n')
            current_app.rag_tuning_logger.debug(f'Similarity Threshold: {similarity_threshold}\n')
            current_app.rag_tuning_logger.debug(f'K: {k}\n')
            current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
        try:
            current_date = get_date_in_timezone(self.tenant_info['timezone'])
            # Subquery to find the latest version of each document
@@ -40,24 +84,31 @@ class EveAIRetriever(BaseRetriever):
            # Main query to filter embeddings
            query_obj = (
                db.session.query(db_class,
-                                 db_class.embedding.cosine_distance(query_embedding).label('distance'))
+                                 (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity'))
                .join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
                .join(Document, DocumentVersion.doc_id == Document.id)
                .join(subquery, DocumentVersion.id == subquery.c.latest_version_id)
                .filter(
-                    or_(Document.valid_from.is_(None), Document.valid_from <= current_date),
+                    or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
-                    or_(Document.valid_to.is_(None), Document.valid_to >= current_date),
+                    or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
-                    db_class.embedding.cosine_distance(query_embedding) < similarity_threshold
+                    (1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold
                )
-                .order_by('distance')
+                .order_by(desc('similarity'))
                .limit(k)
            )
            if self.tenant_info['rag_tuning']:
                current_app.rag_tuning_logger.debug(f'Query executed for Retrieval of documents: \n')
                current_app.rag_tuning_logger.debug(f'{query_obj.statement}\n')
                current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
            res = query_obj.all()
            if self.tenant_info['rag_tuning']:
-                current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents')
+                current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents \n')
-                current_app.rag_tuning_logger.debug(f'---------------------------------------')
+                current_app.rag_tuning_logger.debug(f'Data retrieved: \n')
                current_app.rag_tuning_logger.debug(f'{res}\n')
                current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
            result = []
            for doc in res:
--- a/common/models/user.py
+++ b/common/models/user.py
@@ -82,7 +82,6 @@ class Tenant(db.Model):
            'html_excluded_elements': self.html_excluded_elements,
            'min_chunk_size': self.min_chunk_size,
            'max_chunk_size': self.max_chunk_size,
            'es_k'
            'es_k': self.es_k,
            'es_similarity_threshold': self.es_similarity_threshold,
            'chat_RAG_temperature': self.chat_RAG_temperature,
--- a/common/utils/minio_utils.py
+++ b/common/utils/minio_utils.py
@@ -0,0 +1,86 @@
 from minio import Minio
 from minio.error import S3Error
 from flask import Flask
 import io
 from werkzeug.datastructures import FileStorage
 class MinioClient:
    def __init__(self):
        self.client = None
    def init_app(self, app: Flask):
        self.client = Minio(
            app.config['MINIO_ENDPOINT'],
            access_key=app.config['MINIO_ACCESS_KEY'],
            secret_key=app.config['MINIO_SECRET_KEY'],
            secure=app.config.get('MINIO_USE_HTTPS', False)
        )
        app.logger.info(f"MinIO client initialized with endpoint: {app.config['MINIO_ENDPOINT']}")
    def generate_bucket_name(self, tenant_id):
        return f"tenant-{tenant_id}-bucket"
    def create_tenant_bucket(self, tenant_id):
        bucket_name = self.generate_bucket_name(tenant_id)
        try:
            if not self.client.bucket_exists(bucket_name):
                self.client.make_bucket(bucket_name)
                return bucket_name
            return bucket_name
        except S3Error as err:
            raise Exception(f"Error occurred while creating bucket: {err}")
    def generate_object_name(self, document_id, language, version_id, filename):
        return f"{document_id}/{language}/{version_id}/{filename}"
    def upload_document_file(self, tenant_id, document_id, language, version_id, filename, file_data):
        bucket_name = self.generate_bucket_name(tenant_id)
        object_name = self.generate_object_name(document_id, language, version_id, filename)
        try:
            if isinstance(file_data, FileStorage):
                file_data = file_data.read()
            elif isinstance(file_data, io.BytesIO):
                file_data = file_data.getvalue()
            elif isinstance(file_data, str):
                file_data = file_data.encode('utf-8')
            elif not isinstance(file_data, bytes):
                raise TypeError('Unsupported file type. Expected FileStorage, BytesIO, str, or bytes.')
            self.client.put_object(
                bucket_name, object_name, io.BytesIO(file_data), len(file_data)
            )
            return True
        except S3Error as err:
            raise Exception(f"Error occurred while uploading file: {err}")
    def download_document_file(self, tenant_id, document_id, language, version_id, filename):
        bucket_name = self.generate_bucket_name(tenant_id)
        object_name = self.generate_object_name(document_id, language, version_id, filename)
        try:
            response = self.client.get_object(bucket_name, object_name)
            return response.read()
        except S3Error as err:
            raise Exception(f"Error occurred while downloading file: {err}")
    def list_document_files(self, tenant_id, document_id, language=None, version_id=None):
        bucket_name = self.generate_bucket_name(tenant_id)
        prefix = f"{document_id}/"
        if language:
            prefix += f"{language}/"
            if version_id:
                prefix += f"{version_id}/"
        try:
            objects = self.client.list_objects(bucket_name, prefix=prefix, recursive=True)
            return [obj.object_name for obj in objects]
        except S3Error as err:
            raise Exception(f"Error occurred while listing files: {err}")
    def delete_document_file(self, tenant_id, document_id, language, version_id, filename):
        bucket_name = self.generate_bucket_name(tenant_id)
        object_name = self.generate_object_name(document_id, language, version_id, filename)
        try:
            self.client.remove_object(bucket_name, object_name)
            return True
        except S3Error as err:
            raise Exception(f"Error occurred while deleting file: {err}")
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -141,7 +141,7 @@ def select_model_variables(tenant):
                                                       default_headers=portkey_headers)
            tool_calling_supported = False
            match llm_model:
-                case 'gpt-4-turbo' | 'gpt-4o':
+                case 'gpt-4-turbo' | 'gpt-4o' | 'gpt-4o-mini':
                    tool_calling_supported = True
                case _:
                    raise Exception(f'Error setting model variables for tenant {tenant.id} '
--- a/config/config.py
+++ b/config/config.py
@@ -61,7 +61,7 @@ class Config(object):
    # supported LLMs
    SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'openai.text-embedding-3-large', 'mistral.mistral-embed']
-    SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet']
+    SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet', 'openai.gpt-4o-mini']
    ANTHROPIC_LLM_VERSIONS = {'claude-3-5-sonnet': 'claude-3-5-sonnet-20240620', }
@@ -71,6 +71,7 @@ class Config(object):
    # Annotation text chunk length
    ANNOTATION_TEXT_CHUNK_LENGTH = {
        'openai.gpt-4o': 10000,
        'openai.gpt-4o-mini': 10000,
        'anthropic.claude-3-5-sonnet': 8000
    }
@@ -184,12 +185,95 @@ class DevConfig(Config):
    # PATH settings
    ffmpeg_path = '/usr/bin/ffmpeg'
    # MINIO
    MINIO_ENDPOINT = 'minio:9000'
    MINIO_ACCESS_KEY = 'minioadmin'
    MINIO_SECRET_KEY = 'minioadmin'
 class ProdConfig(Config):
    DEVELOPMENT = False
    DEBUG = False
-    # SQLALCHEMY_DATABASE_URI = environ.get('SQLALCHEMY_DATABASE_URI') or \
+    DEVELOPMENT = True
-    #                           'sqlite:///' + os.path.join(basedir, 'db.sqlite')
+    DEBUG = True
    FLASK_DEBUG = True
    PYCHARM_DEBUG = False
    DB_HOST = environ.get('DB_HOST', 'bswnz4.stackhero-network.com')
    DB_USER = environ.get('DB_USER', 'luke_skywalker')
    DB_PASS = environ.get('DB_PASS', '2MK&1rHmWEydE2rFuJLq*ls%tdkPAk2')
    DB_NAME = environ.get('DB_NAME', 'eveai')
    DB_PORT = environ.get('DB_PORT', '5945')
    SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
    SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
    EXPLAIN_TEMPLATE_LOADING = False
    # Define the nginx prefix used for the specific apps
    EVEAI_APP_LOCATION_PREFIX = '/admin'
    EVEAI_CHAT_LOCATION_PREFIX = '/chat'
    # flask-mailman settings
    MAIL_USERNAME = 'eveai_super@flow-it.net'
    MAIL_PASSWORD = '$6xsWGbNtx$CFMQZqc*'
    # file upload settings
    UPLOAD_FOLDER = '/app/tenant_files'
    REDIS_USER = 'admin'
    REDIS_PASS = 'b32vtDtLriSY1fL2zGrZg8IZKI0g9ucsLtVNanRFAras6oZ51wjVNB1Y05uG7uEw'
    REDIS_URL = '8bciqc.stackhero-network.com'
    REDIS_PORT = '9961'
    REDIS_BASE_URI = f'redis://{REDIS_USER}:{REDIS_PASS}@{REDIS_URL}:{REDIS_PORT}'
    # Celery settings
    # eveai_app Redis Settings
    CELERY_BROKER_URL = f'{REDIS_BASE_URI}/0'
    CELERY_RESULT_BACKEND = f'{REDIS_BASE_URI}/0'
    # eveai_chat Redis Settings
    CELERY_BROKER_URL_CHAT = f'{REDIS_BASE_URI}/3'
    CELERY_RESULT_BACKEND_CHAT = f'{REDIS_BASE_URI}/3'
    # Session settings
    SESSION_REDIS = redis.from_url(f'{REDIS_BASE_URI}/2')
    # OpenAI API Keys
    OPENAI_API_KEY = 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
    # Groq API Keys
    GROQ_API_KEY = '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'
    # Anthropic API Keys
    ANTHROPIC_API_KEY = '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'
    # Portkey API Keys
    PORTKEY_API_KEY = 'T2Dt4QTpgCvWxa1OftYCJtj7NcDZ'
    # Unstructured settings
    UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
    UNSTRUCTURED_BASE_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io'
    UNSTRUCTURED_FULL_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io/general/v0/general'
    # SocketIO settings
    SOCKETIO_MESSAGE_QUEUE = f'{REDIS_BASE_URI}/1'
    SOCKETIO_CORS_ALLOWED_ORIGINS = '*'
    SOCKETIO_LOGGER = True
    SOCKETIO_ENGINEIO_LOGGER = True
    SOCKETIO_PING_TIMEOUT = 20000
    SOCKETIO_PING_INTERVAL = 25000
    SOCKETIO_MAX_IDLE_TIME = timedelta(minutes=60)  # Changing this value ==> change maxConnectionDuration value in
    # eveai-chat-widget.js
    # Google Cloud settings
    GC_PROJECT_NAME = 'eveai-420711'
    GC_LOCATION = 'europe-west1'
    GC_KEY_RING = 'eveai-chat'
    GC_CRYPTO_KEY = 'envelope-encryption-key'
    # JWT settings
    JWT_SECRET_KEY = 'bsdMkmQ8ObfMD52yAFg4trrvjgjMhuIqg2fjDpD/JqvgY0ccCcmlsEnVFmR79WPiLKEA3i8a5zmejwLZKl4v9Q=='
    # PATH settings
    ffmpeg_path = '/usr/bin/ffmpeg'
 config = {
--- a/config/prompts/openai/gpt-4o-mini.yaml
+++ b/config/prompts/openai/gpt-4o-mini.yaml
@@ -0,0 +1,79 @@
 html_parse: |
  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
  # Best practices are:
  - Respect wordings and language(s) used in the HTML.
  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
  - Be careful of encoding of the text. Everything needs to be human readable.
  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
  HTML is between triple backquotes.
  ```{html}```  
 pdf_parse: |
  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
  # Best practices are:
  - Respect wordings and language(s) used in the PDF.
  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
  - When headings are numbered, show the numbering and define the header level. 
  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
  - Paragraphs are to be stripped of newlines so they become easily readable.
  - Be careful of encoding of the text. Everything needs to be human readable.
  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
  PDF content is between triple backquotes.
  ```{pdf_content}```
 summary: |
  Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
  ```{text}```
 rag: |
  Answer the question based on the following context, delimited between triple backquotes. 
  {tenant_context}
  Use the following {language} in your communication, and cite the sources used.
  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
  Context:
  ```{context}```
  Question:
  {question}
 history: |
  You are a helpful assistant that details a question based on a previous context,
  in such a way that the question is understandable without the previous context. 
  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
  The history is delimited between triple backquotes.
  You answer by stating the question in {language}.
  History:
  ```{history}```
  Question to be detailed:
  {question}
 encyclopedia: |
  You have a lot of background knowledge, and as such you are some kind of 
  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
  Question:
  {question}
 transcript: |
  You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
  # Best practices and steps are:
  - Respect wordings and language(s) used in the transcription. Main language is {language}.
  - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.    
  - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
  - annotate the text to identify these logical parts using headings in {language}.
  - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
  The transcript is between triple backquotes.
  ```{transcript}```
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -15,6 +15,9 @@ x-common-variables: &common-variables
  DB_NAME: eveai
  FLASK_ENV: development
  FLASK_DEBUG: 1
  MINIO_ENDPOINT: minio:9000
  MINIO_ACCESS_KEY: minioadmin
  MINIO_SECRET_KEY: minioadmin
 services:
  nginx:
@@ -48,12 +51,13 @@ services:
      - ../scripts:/app/scripts
      - ../patched_packages:/app/patched_packages
      - ./logs:/app/logs
      - ./tenant_files:/app/tenant_files
    depends_on:
       db:
         condition: service_healthy
       redis:
         condition: service_healthy
       minio:
         condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5001/health"]
      interval: 10s
@@ -76,12 +80,13 @@ services:
      - ../scripts:/app/scripts
      - ../patched_packages:/app/patched_packages
      - ./logs:/app/logs
      - ./tenant_files:/app/tenant_files
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
      minio:
        condition: service_healthy
 #    healthcheck:
 #      test: [ "CMD", "curl", "-f", "http://localhost:5001/health" ]
 #      interval: 10s
@@ -174,7 +179,30 @@ services:
      interval: 10s
      timeout: 5s
      retries: 5
-#volumes:
+
  minio:
    image: minio/minio
    ports:
      - "9000:9000"
      - "9001:9001"
    expose:
      - 9000
    volumes:
      - ./minio/data:/data
      - ./minio/config:/root/.minio
    environment:
      MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
    command: server /data --console-address ":9001"
    healthcheck:
      test: [ "CMD", "mc", "ready", "local" ]
      interval: 30s
      timeout: 20s
      retries: 3
      start_period: 30s
 volumes:
  minio_data:
 #  db-data:
 #  redis-data:
 #  tenant-files:
--- a/eveai_app/init.py
+++ b/eveai_app/init.py
@@ -6,7 +6,8 @@ from flask_security.signals import user_authenticated
 from werkzeug.middleware.proxy_fix import ProxyFix
 import logging.config
-from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session
+from common.extensions import (db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session,
                               minio_client)
 from common.models.user import User, Role, Tenant, TenantDomain
 import common.models.interaction
 from config.logging_config import LOGGING
@@ -102,6 +103,7 @@ def register_extensions(app):
    cors.init_app(app)
    kms_client.init_app(app)
    session.init_app(app)
    minio_client.init_app(app)
 # Register Blueprints
--- a/eveai_app/views/document_views.py
+++ b/eveai_app/views/document_views.py
@@ -14,9 +14,10 @@ import requests
 from requests.exceptions import SSLError
 from urllib.parse import urlparse
 import io
 from minio.error import S3Error
 from common.models.document import Document, DocumentVersion
-from common.extensions import db
+from common.extensions import db, minio_client
 from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
    AddURLsForm
 from common.utils.middleware import mw_before_request
@@ -558,33 +559,34 @@ def upload_file_for_version(doc_vers, file, extension):
    doc_vers.file_name = doc_vers.calc_file_name()
    doc_vers.file_location = doc_vers.calc_file_location()
-    upload_path = os.path.join(current_app.config['UPLOAD_FOLDER'], doc_vers.file_location)
+    # Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
-    if not os.path.exists(upload_path):
+    tenant_id = session['tenant']['id']
-        os.makedirs(upload_path, exist_ok=True)
+    minio_client.create_tenant_bucket(tenant_id)
    if isinstance(file, FileStorage):
        file.save(os.path.join(upload_path, doc_vers.file_name))
    elif isinstance(file, io.BytesIO):
        # It's a BytesIO object, handle accordingly
        # Example: write content to a file manually
        with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
            f.write(file.getvalue())
    elif isinstance(file, str):
        # It's a string, handle accordingly
        with open(os.path.join(upload_path, doc_vers.file_name), 'w') as f:
            f.write(file)
    else:
        raise TypeError('Unsupported file type.')
    try:
        minio_client.upload_document_file(
            tenant_id,
            doc_vers.doc_id,
            doc_vers.language,
            doc_vers.id,
            doc_vers.file_name,
            file
        )
        db.session.commit()
        current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
                                f'document version {doc_vers.id} while uploading file.')
    except S3Error as e:
        db.session.rollback()
        flash('Error saving document to MinIO.', 'error')
        current_app.logger.error(
            f'Error saving document to MinIO for tenant {tenant_id}: {e}')
        raise
    except SQLAlchemyError as e:
        db.session.rollback()
-        flash('Error saving document.', 'error')
+        flash('Error saving document metadata.', 'error')
        current_app.logger.error(
-            f'Error saving document for tenant {session["tenant"]["id"]} while uploading file: {e}')
+            f'Error saving document metadata for tenant {tenant_id}: {e}')
-
+        raise
    current_app.logger.info(f'Succesfully saved document for tenant {session['tenant']['id']} for '
                            f'document version {doc_vers.id} while uploading file.')
 def fetch_html(url):
--- a/eveai_app/views/user_views.py
+++ b/eveai_app/views/user_views.py
@@ -8,7 +8,7 @@ from sqlalchemy.exc import SQLAlchemyError
 import ast
 from common.models.user import User, Tenant, Role, TenantDomain
-from common.extensions import db, kms_client, security
+from common.extensions import db, kms_client, security, minio_client
 from common.utils.security_utils import send_confirmation_email, send_reset_email
 from .user_forms import TenantForm, CreateUserForm, EditUserForm, TenantDomainForm
 from common.utils.database import Database
@@ -61,12 +61,13 @@ def tenant():
        #                     rag_tuning=form.rag_tuning.data)
        # Handle Embedding Variables
-        new_tenant.html_tags = form.html_tags.data.split(',') if form.html_tags.data else []
+        new_tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
-        new_tenant.html_end_tags = form.html_end_tags.data.split(',') if form.html_end_tags.data else []
+        new_tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
-        new_tenant.html_included_elements = form.html_included_elements.data.split(
+            if form.html_end_tags.data else []
-            ',') if form.html_included_elements.data else []
+        new_tenant.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
-        new_tenant.html_excluded_elements = form.html_excluded_elements.data.split(
+            if form.html_included_elements.data else []
-            ',') if form.html_excluded_elements.data else []
+        new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
            if form.html_excluded_elements.data else []
        current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
                                 f'html_end_tags: {new_tenant.html_end_tags},'
@@ -87,11 +88,17 @@ def tenant():
            flash(f'Failed to add tenant to database. Error: {str(e)}')
            return render_template('user/tenant.html', form=form)
        # Create schema for new tenant
        current_app.logger.info(f"Successfully created tenant {new_tenant.id} in Database")
        flash(f"Successfully created tenant {new_tenant.id} in Database")
        # Create schema for new tenant
        current_app.logger.info(f"Creating schema for tenant {new_tenant.id}")
        Database(new_tenant.id).create_tenant_schema()
        # Create MinIO bucket for new tenant
        current_app.logger.info(f"Creating MinIO bucket for tenant {new_tenant.id}")
        minio_client.create_tenant_bucket(new_tenant.id)
        return redirect(prefixed_url_for('basic_bp.index'))
    else:
        form_validation_failed(request, form)
--- a/eveai_chat_workers/tasks.py
+++ b/eveai_chat_workers/tasks.py
@@ -81,6 +81,12 @@ def ask_question(tenant_id, question, language, session_id, user_timezone):
                current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
                raise
        if tenant.rag_tuning:
            current_app.rag_tuning_logger.debug(f'Received question for tenant {tenant_id}:\n{question}. Processing...')
            current_app.rag_tuning_logger.debug(f'Tenant Information: \n{tenant.to_dict()}')
            current_app.rag_tuning_logger.debug(f'===================================================================')
            current_app.rag_tuning_logger.debug(f'===================================================================')
        result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
        result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
        result['interaction_id'] = interaction.id
@@ -116,6 +122,9 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
    detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
    current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
    if tenant.rag_tuning:
        current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
        current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
    new_interaction.detailed_question = detailed_question
    new_interaction.detailed_question_at = dt.now(tz.utc)
@@ -126,6 +135,9 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
    full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
    rag_prompt = ChatPromptTemplate.from_template(full_template)
    setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    if tenant.rag_tuning:
        current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
        current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
    new_interaction_embeddings = []
    if not model_variables['cited_answer_cls']:  # The model doesn't support structured feedback
@@ -151,6 +163,11 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
        current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
        current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
        current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
        if tenant.rag_tuning:
            current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
            current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
            current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
            current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
        new_interaction.answer = result['answer']
        # Filter out the existing Embedding IDs
@@ -161,7 +178,11 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
            .all()
        )
        existing_embedding_ids = [emb.id for emb in embeddings]
-        urls = [emb.document_version.url for emb in embeddings]
+        urls = list(set(emb.document_version.url for emb in embeddings))
        if tenant.rag_tuning:
            current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
            current_app.rag_tuning_logger.debug(f'{urls}')
            current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
        for emb_id in existing_embedding_ids:
            new_interaction_embedding = InteractionEmbedding(embedding_id=emb_id)
--- a/eveai_workers/init.py
+++ b/eveai_workers/init.py
@@ -3,7 +3,7 @@ import logging.config
 from flask import Flask
 from common.utils.celery_utils import make_celery, init_celery
-from common.extensions import db
+from common.extensions import db, minio_client
 from config.logging_config import LOGGING
@@ -33,6 +33,7 @@ def create_app(config_file=None):
 def register_extensions(app):
    db.init_app(app)
    minio_client.init_app(app)
 app, celery = create_app()
--- a/eveai_workers/tasks.py
+++ b/eveai_workers/tasks.py
@@ -1,3 +1,4 @@
 import io
 import os
 from datetime import datetime as dt, timezone as tz
 import subprocess
@@ -21,7 +22,7 @@ import PyPDF2
 from pydub import AudioSegment
 import tempfile
-from common.extensions import db
+from common.extensions import db, minio_client
 from common.models.document import DocumentVersion, Embedding
 from common.models.user import Tenant
 from common.utils.celery_utils import current_celery
@@ -32,11 +33,6 @@ from common.utils.os_utils import safe_remove, sync_folder
@current_celery.task(name='create_embeddings', queue='embeddings')
 def create_embeddings(tenant_id, document_version_id):
    # Setup Remote Debugging only if PYCHARM_DEBUG=True
    if current_app.config['PYCHARM_DEBUG']:
        import pydevd_pycharm
        pydevd_pycharm.settrace('localhost', port=50170, stdoutToServer=True, stderrToServer=True)
    current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id}.')
    try:
@@ -50,6 +46,7 @@ def create_embeddings(tenant_id, document_version_id):
        # Select variables to work with depending on tenant and model
        model_variables = select_model_variables(tenant)
        current_app.logger.debug(f'Model variables: {model_variables}')
        # Retrieve document version to process
        document_version = DocumentVersion.query.get(document_version_id)
@@ -107,33 +104,20 @@ def create_embeddings(tenant_id, document_version_id):
 def process_pdf(tenant, model_variables, document_version):
-    base_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
+    file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
-                             document_version.file_location)
+                                       document_version.id, document_version.file_name)
-    file_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
+
                             document_version.file_location,
                             document_version.file_name)
    if os.path.exists(file_path):
    pdf_text = ''
-        # Function to extract text from PDF and return as string
+    pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data))
-        with open(file_path, 'rb') as file:
+    for page in pdf_reader.pages:
            reader = PyPDF2.PdfReader(file)
            for page_num in range(len(reader.pages)):
                page = reader.pages[page_num]
        pdf_text += page.extract_text()
    else:
        current_app.logger.error(f'The physical file for document version {document_version.id} '
                                 f'for tenant {tenant.id} '
                                 f'at {file_path} does not exist')
        create_embeddings.update_state(state=states.FAILURE)
        raise
    markdown = generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_text)
    markdown_file_name = f'{document_version.id}.md'
-    output_file = os.path.join(base_path, markdown_file_name)
+    minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
-    with open(output_file, 'w') as f:
+                         markdown_file_name, markdown.encode())
        f.write(markdown)
-    potential_chunks = create_potential_chunks_for_markdown(base_path, markdown_file_name, tenant)
+    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
    chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
                                         model_variables['max_chunk_size'])
@@ -175,43 +159,29 @@ def delete_embeddings_for_document_version(document_version):
 def process_html(tenant, model_variables, document_version):
    file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
                                       document_version.id, document_version.file_name)
    html_content = file_data.decode('utf-8')
    # The tags to be considered can be dependent on the tenant
    html_tags = model_variables['html_tags']
    html_end_tags = model_variables['html_end_tags']
    html_included_elements = model_variables['html_included_elements']
    html_excluded_elements = model_variables['html_excluded_elements']
    base_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
                             document_version.file_location)
    file_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
                             document_version.file_location,
                             document_version.file_name)
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            html_content = f.read()
    else:
        current_app.logger.error(f'The physical file for document version {document_version.id} '
                                 f'for tenant {tenant.id} '
                                 f'at {file_path} does not exist')
        create_embeddings.update_state(state=states.FAILURE)
        raise
    extracted_html, title = parse_html(tenant, html_content, html_tags, included_elements=html_included_elements,
                                       excluded_elements=html_excluded_elements)
    extracted_file_name = f'{document_version.id}-extracted.html'
-    output_file = os.path.join(base_path, extracted_file_name)
+    minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
-    with open(output_file, 'w') as f:
+                         extracted_file_name, extracted_html.encode())
        f.write(extracted_html)
    markdown = generate_markdown_from_html(tenant, model_variables, document_version, extracted_html)
    markdown_file_name = f'{document_version.id}.md'
-    output_file = os.path.join(base_path, markdown_file_name)
+    minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
-    with open(output_file, 'w') as f:
+                         markdown_file_name, markdown.encode())
        f.write(markdown)
-    potential_chunks = create_potential_chunks_for_markdown(base_path, markdown_file_name, tenant)
+    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
    chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
                                         model_variables['max_chunk_size'])
@@ -222,7 +192,7 @@ def process_html(tenant, model_variables, document_version):
    else:
        document_version.system_context = (f'Title: {title}\n')
-    enriched_chunks = enrich_chunks(tenant, document_version, chunks)
+    enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
    embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
    try:
@@ -241,16 +211,17 @@ def process_html(tenant, model_variables, document_version):
                            f'on document version {document_version.id} :-)')
-def enrich_chunks(tenant, document_version, chunks):
+def enrich_chunks(tenant, document_version, title, chunks):
    current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
                             f'on document version {document_version.id}')
    current_app.logger.debug(f'Nr of chunks: {len(chunks)}')
    chunk_total_context = (f'Filename: {document_version.file_name}\n'
-                           f'User Context:{document_version.user_context}\n'
+                           f'User Context:\n{document_version.user_context}\n\n'
                           f'{document_version.system_context}\n\n')
    enriched_chunks = []
    initial_chunk = (f'Filename: {document_version.file_name}\n'
                     f'User Context:\n{document_version.user_context}\n\n'
                     f'Title: {title}\n'
                     f'{chunks[0]}')
    enriched_chunks.append(initial_chunk)
@@ -311,7 +282,7 @@ def summarize_chunk(tenant, model_variables, document_version, chunk):
    text_to_summarize = doc_creator.create_documents(chunk)
    try:
-        summary = chain.run(text_to_summarize)
+        summary = chain.invoke({"text": text_to_summarize})
        current_app.logger.debug(f'Finished summarizing chunk for tenant {tenant.id} '
                                 f'on document version {document_version.id}.')
        return summary
@@ -391,23 +362,26 @@ def process_youtube(tenant, model_variables, document_version):
    markdown_file_name = f'{document_version.id}.md'
    # Remove existing files (in case of a re-processing of the file
-    safe_remove(os.path.join(base_path, download_file_name))
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
-    safe_remove(os.path.join(base_path, compressed_file_name))
+                                      document_version.id, download_file_name)
-    safe_remove(os.path.join(base_path, transcription_file_name))
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
-    safe_remove(os.path.join(base_path, markdown_file_name))
+                                      document_version.id, compressed_file_name)
-    sync_folder(base_path)
+    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
                                      document_version.id, transcription_file_name)
    minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
                                      document_version.id, markdown_file_name)
-    of, title, description, author = download_youtube(document_version.url, base_path, download_file_name, tenant)
+    of, title, description, author = download_youtube(document_version.url, tenant.id, document_version,
                                                      download_file_name)
    document_version.system_context = f'Title: {title}\nDescription: {description}\nAuthor: {author}'
-    compress_audio(base_path, download_file_name, compressed_file_name, tenant)
+    compress_audio(tenant.id, document_version, download_file_name, compressed_file_name)
-    transcribe_audio(base_path, compressed_file_name, transcription_file_name,
+    transcribe_audio(tenant.id, document_version, compressed_file_name, transcription_file_name, model_variables)
-                     document_version.language, tenant, model_variables)
+    annotate_transcription(tenant, document_version, transcription_file_name, markdown_file_name, model_variables)
    annotate_transcription(base_path, transcription_file_name, markdown_file_name,
                           document_version.language, tenant, model_variables)
-    potential_chunks = create_potential_chunks_for_markdown(base_path, markdown_file_name, tenant)
+    potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
    actual_chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
                                                model_variables['max_chunk_size'])
    enriched_chunks = enrich_chunks(tenant, document_version, actual_chunks)
    embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
@@ -427,27 +401,41 @@ def process_youtube(tenant, model_variables, document_version):
                            f'on Youtube document version {document_version.id} :-)')
-def download_youtube(url, file_location, file_name, tenant):
+def download_youtube(url, tenant_id, document_version, file_name):
    try:
-        current_app.logger.info(f'Downloading YouTube video: {url} on location {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Downloading YouTube video: {url} for tenant: {tenant_id}')
        yt = YouTube(url)
        stream = yt.streams.get_audio_only()
-        output_file = stream.download(output_path=file_location, filename=file_name)
+
-        current_app.logger.info(f'Downloaded YouTube video: {url} on location {file_location} for tenant: {tenant.id}')
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-        return output_file, yt.title, yt.description, yt.author
+            stream.download(output_path=temp_file.name)
            with open(temp_file.name, 'rb') as f:
                file_data = f.read()
        minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id,
                             file_name, file_data)
        current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}')
        return file_name, yt.title, yt.description, yt.author
    except Exception as e:
-        current_app.logger.error(f'Error downloading YouTube video: {url} on location {file_location} for '
+        current_app.logger.error(f'Error downloading YouTube video: {url} for tenant: {tenant_id} with error: {e}')
                                 f'tenant: {tenant.id} with error: {e}')
        raise
-def compress_audio(file_location, input_file, output_file, tenant):
+def compress_audio(tenant_id, document_version, input_file, output_file):
    try:
-        current_app.logger.info(f'Compressing audio on {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Compressing audio for tenant: {tenant_id}')
-        # Run the compression script
+        input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
                                            document_version.id, input_file)
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input:
            temp_input.write(input_data)
            temp_input.flush()
            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_output:
                result = subprocess.run(
-            ['scripts/compress.sh', '-d', file_location, '-i', input_file, '-o', output_file],
+                    ['ffmpeg', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', temp_output.name],
                    capture_output=True,
                    text=True
                )
@@ -455,55 +443,30 @@ def compress_audio(file_location, input_file, output_file, tenant):
                if result.returncode != 0:
                    raise Exception(f"Compression failed: {result.stderr}")
-        output_file_path = os.path.join(file_location, output_file)
+                with open(temp_output.name, 'rb') as f:
                    compressed_data = f.read()
-        # Additional check for file stability
+        minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id,
-        previous_size = -1
+                             output_file, compressed_data)
        stable_count = 0
        max_attempts = 12  # 1 minute total wait time
-        for _ in range(max_attempts):
+        current_app.logger.info(f'Compressed audio for tenant: {tenant_id}')
            if os.path.exists(output_file_path):
                current_size = os.path.getsize(output_file_path)
                if current_size == previous_size:
                    stable_count += 1
                    if stable_count >= 3:  # File size hasn't changed for 3 checks
                        break
                else:
                    stable_count = 0
                previous_size = current_size
            gevent.sleep(5)
        if stable_count < 3:
            raise Exception("File size did not stabilize within the expected time")
        current_app.logger.info(f'Compressed audio for {file_location} for tenant: {tenant.id}')
        return output_file_path
    except Exception as e:
-        current_app.logger.error(f'Error compressing audio on {file_location} for tenant: {tenant.id} with error: {e}')
+        current_app.logger.error(f'Error compressing audio for tenant: {tenant_id} with error: {e}')
        raise
-def transcribe_audio(file_location, input_file, output_file, language, tenant, model_variables):
+def transcribe_audio(tenant_id, document_version, input_file, output_file, model_variables):
    try:
-        current_app.logger.info(f'Transcribing audio on {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Transcribing audio for tenant: {tenant_id}')
        client = model_variables['transcription_client']
        model = model_variables['transcription_model']
        input_file_path = os.path.join(file_location, input_file)
        output_file_path = os.path.join(file_location, output_file)
-        # Wait for the input file to exist
+        # Download the audio file from MinIO
-        count = 0
+        audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
-        while not os.path.exists(input_file_path) and count < 10:
+                                            document_version.id, input_file)
            gevent.sleep(1)
            current_app.logger.debug(f'Waiting for {input_file_path} to exist... Count: {count}')
            count += 1
-        if not os.path.exists(input_file_path):
+        # Load the audio data into pydub
-            raise FileNotFoundError(f"Input file {input_file_path} not found after waiting.")
+        audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
        # Load the audio file
        audio = AudioSegment.from_file(input_file_path)
        # Define segment length (e.g., 10 minutes)
        segment_length = 10 * 60 * 1000  # 10 minutes in milliseconds
@@ -512,14 +475,16 @@ def transcribe_audio(file_location, input_file, output_file, language, tenant, m
        # Split audio into segments and transcribe each
        for i, chunk in enumerate(audio[::segment_length]):
-            current_app.logger.debug(f'Transcribing chunk {i} of {len(audio) // segment_length} ')
+            current_app.logger.debug(f'Transcribing chunk {i + 1} of {len(audio) // segment_length + 1}')
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
                chunk.export(temp_audio.name, format="mp3")
                with open(temp_audio.name, 'rb') as audio_segment:
                    transcription = client.audio.transcriptions.create(
                        file=audio_segment,
                        model=model,
-                        language=language,
+                        language=document_version.language,
                        response_format='verbose_json',
                    )
@@ -530,20 +495,25 @@ def transcribe_audio(file_location, input_file, output_file, language, tenant, m
        # Combine all transcriptions
        full_transcription = " ".join(transcriptions)
-        # Write the full transcription to the output file
+        # Upload the full transcription to MinIO
-        with open(output_file_path, 'w') as f:
+        minio_client.upload_document_file(
-            f.write(full_transcription)
+            tenant_id,
            document_version.doc_id,
            document_version.language,
            document_version.id,
            output_file,
            full_transcription.encode('utf-8')
        )
-        current_app.logger.info(f'Transcribed audio for {file_location} for tenant: {tenant.id}')
+        current_app.logger.info(f'Transcribed audio for tenant: {tenant_id}')
    except Exception as e:
-        current_app.logger.error(f'Error transcribing audio for {file_location} for tenant: {tenant.id}, '
+        current_app.logger.error(f'Error transcribing audio for tenant: {tenant_id}, with error: {e}')
                                 f'with error: {e}')
        raise
-def annotate_transcription(file_location, input_file, output_file, language, tenant, model_variables):
+def annotate_transcription(tenant, document_version, input_file, output_file, model_variables):
    try:
-        current_app.logger.debug(f'Annotating transcription on {file_location} for tenant {tenant.id}')
+        current_app.logger.debug(f'Annotating transcription for tenant {tenant.id}')
        char_splitter = CharacterTextSplitter(separator='.',
                                              chunk_size=model_variables['annotation_chunk_length'],
@@ -552,18 +522,21 @@ def annotate_transcription(file_location, input_file, output_file, language, ten
        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
            # ("###", "Header 3"),
        ]
        markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
        llm = model_variables['llm']
        template = model_variables['transcript_template']
-        language_template = create_language_template(template, language)
+        language_template = create_language_template(template, document_version.language)
        transcript_prompt = ChatPromptTemplate.from_template(language_template)
        setup = RunnablePassthrough()
        output_parser = StrOutputParser()
-        with open(os.path.join(file_location, input_file), 'r') as f:
+
-            transcript = f.read()
+        # Download the transcription file from MinIO
        transcript_data = minio_client.download_document_file(tenant.id, document_version.doc_id,
                                                              document_version.language, document_version.id,
                                                              input_file)
        transcript = transcript_data.decode('utf-8')
        chain = setup | transcript_prompt | llm | output_parser
@@ -598,38 +571,53 @@ def annotate_transcription(file_location, input_file, output_file, language, ten
            markdown_chunks.pop()
            all_markdown_chunks += markdown_chunks
        all_markdown_chunks += [last_markdown_chunk]
        annotated_transcript = '\n'.join(all_markdown_chunks)
-        with open(os.path.join(file_location, output_file), 'w') as f:
+        # Upload the annotated transcript to MinIO
-            f.write(annotated_transcript)
+        minio_client.upload_document_file(
            tenant.id,
            document_version.doc_id,
            document_version.language,
            document_version.id,
            output_file,
            annotated_transcript.encode('utf-8')
        )
-        current_app.logger.info(f'Annotated transcription for {file_location} for tenant {tenant.id}')
+        current_app.logger.info(f'Annotated transcription for tenant {tenant.id}')
    except Exception as e:
-        current_app.logger.error(f'Error annotating transcription for {file_location} for tenant {tenant.id}, '
+        current_app.logger.error(f'Error annotating transcription for tenant {tenant.id}, with error: {e}')
                                 f'with error: {e}')
        raise
-def create_potential_chunks_for_markdown(base_path, input_file, tenant):
+def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
-    current_app.logger.info(f'Creating potential chunks for {base_path} for tenant {tenant.id}')
+    try:
-    markdown = ''
+        current_app.logger.info(f'Creating potential chunks for tenant {tenant_id}')
-    with open(os.path.join(base_path, input_file), 'r') as f:
+
-        markdown = f.read()
+        # Download the markdown file from MinIO
        markdown_data = minio_client.download_document_file(tenant_id,
                                                            document_version.doc_id,
                                                            document_version.language,
                                                            document_version.id,
                                                            input_file
                                                            )
        markdown = markdown_data.decode('utf-8')
        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
        # ("###", "Header 3"),
        ]
        markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
        md_header_splits = markdown_splitter.split_text(markdown)
        potential_chunks = [doc.page_content for doc in md_header_splits]
        current_app.logger.debug(f'Created {len(potential_chunks)} potential chunks for tenant {tenant_id}')
        return potential_chunks
    except Exception as e:
        current_app.logger.error(f'Error creating potential chunks for tenant {tenant_id}, with error: {e}')
        raise
 def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars):
--- a/nginx/public/chat_eveai_mini.html
+++ b/nginx/public/chat_eveai_mini.html
@@ -0,0 +1,28 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Chat Client EveAI Mini</title>
  <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
  <script src="https://cdn.socket.io/4.0.1/socket.io.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
  <script src="/static/js/eveai-sdk.js" defer></script>
  <script src="/static/js/eveai-chat-widget.js" defer></script>
  <link rel="stylesheet" href="/static/css/eveai-chat-style.css">
 </head>
 <body>
  <div id="chat-container"></div>
  <script>
    document.addEventListener('DOMContentLoaded', function() {
      const eveAI = new EveAI(
        '6',
        'EveAI-CHAT-3622-2083-4559-6024-8786',
        'http://macstudio.ask-eve-ai-local.com',
        'en'
      );
      eveAI.initializeChat('chat-container');
    });
  </script>
 </body>
 </html>
--- a/nginx/public/chat_flow.html
+++ b/nginx/public/chat_flow.html
@@ -0,0 +1,28 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Chat Client AE</title>
  <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
  <script src="https://cdn.socket.io/4.0.1/socket.io.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
  <script src="/static/js/eveai-sdk.js" defer></script>
  <script src="/static/js/eveai-chat-widget.js" defer></script>
  <link rel="stylesheet" href="/static/css/eveai-chat-style.css">
 </head>
 <body>
  <div id="chat-container"></div>
  <script>
    document.addEventListener('DOMContentLoaded', function() {
      const eveAI = new EveAI(
        '2',
        'EveAI-CHAT-8716-3188-4285-8044-9932',
        'http://macstudio.ask-eve-ai-local.com',
        'en'
      );
      eveAI.initializeChat('chat-container');
    });
  </script>
 </body>
 </html>
--- a/requirements.txt
+++ b/requirements.txt
@@ -76,3 +76,7 @@ groq~=0.9.0
 pydub~=0.25.1
 argparse~=1.4.0
 portkey_ai~=1.7.0
 minio~=7.2.7
 Werkzeug~=3.0.3
 itsdangerous~=2.2.0
--- a/scripts/start_eveai_chat.sh
+++ b/scripts/start_eveai_chat.sh
@@ -7,7 +7,7 @@ export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR"  # In
 # Set flask environment variables
 #export FLASK_ENV=development  # Use 'production' as appropriate
 #export FLASK_DEBUG=1  # Use 0 for production
-print "Starting EveAI Chat"
+echo "Starting EveAI Chat"
 # Start Flask app
 gunicorn -w 4 -k geventwebsocket.gunicorn.workers.GeventWebSocketWorker -b 0.0.0.0:5002 scripts.run_eveai_chat:app