- Improvements to enable deployment in the cloud, mainly changing file access to Minio

- Improvements on RAG logging, and some debugging in that area
2024-08-01 17:35:54 +02:00
parent 88ca04136d
commit 64cf8df3a9
19 changed files with 617 additions and 206 deletions
--- a/config/config.py
+++ b/config/config.py
@@ -61,7 +61,7 @@ class Config(object):

    # supported LLMs
    SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'openai.text-embedding-3-large', 'mistral.mistral-embed']
-    SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet']
+    SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet', 'openai.gpt-4o-mini']

    ANTHROPIC_LLM_VERSIONS = {'claude-3-5-sonnet': 'claude-3-5-sonnet-20240620', }

@@ -71,6 +71,7 @@ class Config(object):
    # Annotation text chunk length
    ANNOTATION_TEXT_CHUNK_LENGTH = {
        'openai.gpt-4o': 10000,
+        'openai.gpt-4o-mini': 10000,
        'anthropic.claude-3-5-sonnet': 8000
    }

@@ -184,12 +185,95 @@ class DevConfig(Config):
    # PATH settings
    ffmpeg_path = '/usr/bin/ffmpeg'

+    # MINIO
+    MINIO_ENDPOINT = 'minio:9000'
+    MINIO_ACCESS_KEY = 'minioadmin'
+    MINIO_SECRET_KEY = 'minioadmin'
+

 class ProdConfig(Config):
    DEVELOPMENT = False
    DEBUG = False
-    # SQLALCHEMY_DATABASE_URI = environ.get('SQLALCHEMY_DATABASE_URI') or \
-    #                           'sqlite:///' + os.path.join(basedir, 'db.sqlite')
+    DEVELOPMENT = True
+    DEBUG = True
+    FLASK_DEBUG = True
+    PYCHARM_DEBUG = False
+    DB_HOST = environ.get('DB_HOST', 'bswnz4.stackhero-network.com')
+    DB_USER = environ.get('DB_USER', 'luke_skywalker')
+    DB_PASS = environ.get('DB_PASS', '2MK&1rHmWEydE2rFuJLq*ls%tdkPAk2')
+    DB_NAME = environ.get('DB_NAME', 'eveai')
+    DB_PORT = environ.get('DB_PORT', '5945')
+
+    SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
+    SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
+    EXPLAIN_TEMPLATE_LOADING = False
+
+    # Define the nginx prefix used for the specific apps
+    EVEAI_APP_LOCATION_PREFIX = '/admin'
+    EVEAI_CHAT_LOCATION_PREFIX = '/chat'
+
+    # flask-mailman settings
+    MAIL_USERNAME = 'eveai_super@flow-it.net'
+    MAIL_PASSWORD = '$6xsWGbNtx$CFMQZqc*'
+
+    # file upload settings
+    UPLOAD_FOLDER = '/app/tenant_files'
+
+    REDIS_USER = 'admin'
+    REDIS_PASS = 'b32vtDtLriSY1fL2zGrZg8IZKI0g9ucsLtVNanRFAras6oZ51wjVNB1Y05uG7uEw'
+    REDIS_URL = '8bciqc.stackhero-network.com'
+    REDIS_PORT = '9961'
+    REDIS_BASE_URI = f'redis://{REDIS_USER}:{REDIS_PASS}@{REDIS_URL}:{REDIS_PORT}'
+
+    # Celery settings
+    # eveai_app Redis Settings
+    CELERY_BROKER_URL = f'{REDIS_BASE_URI}/0'
+    CELERY_RESULT_BACKEND = f'{REDIS_BASE_URI}/0'
+    # eveai_chat Redis Settings
+    CELERY_BROKER_URL_CHAT = f'{REDIS_BASE_URI}/3'
+    CELERY_RESULT_BACKEND_CHAT = f'{REDIS_BASE_URI}/3'
+
+    # Session settings
+    SESSION_REDIS = redis.from_url(f'{REDIS_BASE_URI}/2')
+
+    # OpenAI API Keys
+    OPENAI_API_KEY = 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
+
+    # Groq API Keys
+    GROQ_API_KEY = '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'
+
+    # Anthropic API Keys
+    ANTHROPIC_API_KEY = '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'
+
+    # Portkey API Keys
+    PORTKEY_API_KEY = 'T2Dt4QTpgCvWxa1OftYCJtj7NcDZ'
+
+    # Unstructured settings
+    UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
+    UNSTRUCTURED_BASE_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io'
+    UNSTRUCTURED_FULL_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io/general/v0/general'
+
+    # SocketIO settings
+    SOCKETIO_MESSAGE_QUEUE = f'{REDIS_BASE_URI}/1'
+    SOCKETIO_CORS_ALLOWED_ORIGINS = '*'
+    SOCKETIO_LOGGER = True
+    SOCKETIO_ENGINEIO_LOGGER = True
+    SOCKETIO_PING_TIMEOUT = 20000
+    SOCKETIO_PING_INTERVAL = 25000
+    SOCKETIO_MAX_IDLE_TIME = timedelta(minutes=60)  # Changing this value ==> change maxConnectionDuration value in
+    # eveai-chat-widget.js
+
+    # Google Cloud settings
+    GC_PROJECT_NAME = 'eveai-420711'
+    GC_LOCATION = 'europe-west1'
+    GC_KEY_RING = 'eveai-chat'
+    GC_CRYPTO_KEY = 'envelope-encryption-key'
+
+    # JWT settings
+    JWT_SECRET_KEY = 'bsdMkmQ8ObfMD52yAFg4trrvjgjMhuIqg2fjDpD/JqvgY0ccCcmlsEnVFmR79WPiLKEA3i8a5zmejwLZKl4v9Q=='
+
+    # PATH settings
+    ffmpeg_path = '/usr/bin/ffmpeg'


 config = {
--- a/config/prompts/openai/gpt-4o-mini.yaml
+++ b/config/prompts/openai/gpt-4o-mini.yaml
@@ -0,0 +1,79 @@
+html_parse: |
+  You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+  
+  # Best practices are:
+  - Respect wordings and language(s) used in the HTML.
+  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+  - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.  
+  - Be careful of encoding of the text. Everything needs to be human readable.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
+
+  HTML is between triple backquotes.
+
+  ```{html}```  
+
+pdf_parse: |
+  You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+  # Best practices are:
+  - Respect wordings and language(s) used in the PDF.
+  - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+  - When headings are numbered, show the numbering and define the header level. 
+  - A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
+  - Paragraphs are to be stripped of newlines so they become easily readable.
+  - Be careful of encoding of the text. Everything needs to be human readable.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
+
+  PDF content is between triple backquotes.
+
+  ```{pdf_content}```
+
+summary: |
+  Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
+  ```{text}```
+
+rag: |
+  Answer the question based on the following context, delimited between triple backquotes. 
+  {tenant_context}
+  Use the following {language} in your communication, and cite the sources used.
+  If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
+  Context:
+  ```{context}```
+  Question:
+  {question}
+
+history: |
+  You are a helpful assistant that details a question based on a previous context,
+  in such a way that the question is understandable without the previous context. 
+  The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
+  The history is delimited between triple backquotes.
+  You answer by stating the question in {language}.
+  History:
+  ```{history}```
+  Question to be detailed:
+  {question}
+
+encyclopedia: |
+  You have a lot of background knowledge, and as such you are some kind of 
+  'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question. 
+  If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
+  Question:
+  {question}
+
+transcript: |
+  You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
+
+  # Best practices and steps are:
+  - Respect wordings and language(s) used in the transcription. Main language is {language}.
+  - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.    
+  - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
+  - annotate the text to identify these logical parts using headings in {language}.
+  - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
+
+  Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
+
+  The transcript is between triple backquotes.
+
+  ```{transcript}```