diff --git a/.env b/.env
new file mode 100644
index 0000000..5db9aef
--- /dev/null
+++ b/.env
@@ -0,0 +1,4 @@
+DB_HOST=localhost
+DB_USER=luke
+DB_PASS=Skywalker!
+DB_NAME=eveai
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c51fd9e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+docker/db/postgresql/
+docker/db/redis/
+docker/logs/
+docker/tenant_files/
+/docker/minio/
+/docker/pulling
+/docker/creating
+/docker/[internal]
+/docker/=
+/docker/stackhero/
+.DS_Store
+**/.DS_Store
diff --git a/.idea/eveAI.iml b/.idea/eveAI.iml
index bb6e42e..5d83c45 100644
--- a/.idea/eveAI.iml
+++ b/.idea/eveAI.iml
@@ -8,7 +8,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 8b28aad..e8182a0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml
new file mode 100644
index 0000000..6df4889
--- /dev/null
+++ b/.idea/sqldialects.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..af25bdb
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+eveai_dev
diff --git a/__pycache__/app.cpython-311.pyc b/__pycache__/app.cpython-311.pyc
deleted file mode 100644
index 568e3b1..0000000
Binary files a/__pycache__/app.cpython-311.pyc and /dev/null differ
diff --git a/__pycache__/app.cpython-312.pyc b/__pycache__/app.cpython-312.pyc
deleted file mode 100644
index 49f3444..0000000
Binary files a/__pycache__/app.cpython-312.pyc and /dev/null differ
diff --git a/__pycache__/config.cpython-312.pyc b/__pycache__/config.cpython-312.pyc
deleted file mode 100644
index 58be9bf..0000000
Binary files a/__pycache__/config.cpython-312.pyc and /dev/null differ
diff --git a/common/.DS_Store b/common/.DS_Store
new file mode 100644
index 0000000..59df3c2
Binary files /dev/null and b/common/.DS_Store differ
diff --git a/common/__pycache__/__init__.cpython-312.pyc b/common/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..0852227
Binary files /dev/null and b/common/__pycache__/__init__.cpython-312.pyc differ
diff --git a/common/__pycache__/extensions.cpython-312.pyc b/common/__pycache__/extensions.cpython-312.pyc
new file mode 100644
index 0000000..29e10e5
Binary files /dev/null and b/common/__pycache__/extensions.cpython-312.pyc differ
diff --git a/common/extensions.py b/common/extensions.py
index a676f55..f36134b 100644
--- a/common/extensions.py
+++ b/common/extensions.py
@@ -10,7 +10,9 @@ from flask_jwt_extended import JWTManager
from flask_session import Session
from flask_wtf import CSRFProtect
-from .utils.key_encryption import JosKMSClient
+# from .utils.key_encryption import JosKMSClient
+from .utils.simple_encryption import SimpleEncryption
+from .utils.minio_utils import MinioClient
# Create extensions
db = SQLAlchemy()
@@ -25,4 +27,7 @@ socketio = SocketIO()
jwt = JWTManager()
session = Session()
-kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
+# kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
+
+simple_encryption = SimpleEncryption()
+minio_client = MinioClient()
diff --git a/common/langchain/EveAIRetriever.py b/common/langchain/EveAIRetriever.py
index 258da0e..5496398 100644
--- a/common/langchain/EveAIRetriever.py
+++ b/common/langchain/EveAIRetriever.py
@@ -1,5 +1,5 @@
from langchain_core.retrievers import BaseRetriever
-from sqlalchemy import func, and_, or_
+from sqlalchemy import func, and_, or_, desc
from sqlalchemy.exc import SQLAlchemyError
from pydantic import BaseModel, Field
from typing import Any, Dict
@@ -20,12 +20,56 @@ class EveAIRetriever(BaseRetriever):
self.tenant_info = tenant_info
def _get_relevant_documents(self, query: str):
+
+
+
current_app.logger.debug(f'Retrieving relevant documents for query: {query}')
query_embedding = self._get_query_embedding(query)
db_class = self.model_variables['embedding_db_model']
similarity_threshold = self.model_variables['similarity_threshold']
k = self.model_variables['k']
+ if self.tenant_info['rag_tuning']:
+ try:
+ current_date = get_date_in_timezone(self.tenant_info['timezone'])
+ current_app.rag_tuning_logger.debug(f'Current date: {current_date}\n')
+
+ # Debug query to show similarity for all valid documents (without chunk text)
+ debug_query = (
+ db.session.query(
+ Document.id.label('document_id'),
+ DocumentVersion.id.label('version_id'),
+ db_class.id.label('embedding_id'),
+ (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity')
+ )
+ .join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
+ .join(Document, DocumentVersion.doc_id == Document.id)
+ .filter(
+ or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
+ or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date)
+ )
+ .order_by(desc('similarity'))
+ )
+
+ debug_results = debug_query.all()
+
+ current_app.logger.debug("Debug: Similarity for all valid documents:")
+ for row in debug_results:
+ current_app.rag_tuning_logger.debug(f"Doc ID: {row.document_id}, "
+ f"Version ID: {row.version_id}, "
+ f"Embedding ID: {row.embedding_id}, "
+ f"Similarity: {row.similarity}")
+ current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
+ except SQLAlchemyError as e:
+ current_app.logger.error(f'Error generating overview: {e}')
+ db.session.rollback()
+
+ if self.tenant_info['rag_tuning']:
+ current_app.rag_tuning_logger.debug(f'Parameters for Retrieval of documents: \n')
+ current_app.rag_tuning_logger.debug(f'Similarity Threshold: {similarity_threshold}\n')
+ current_app.rag_tuning_logger.debug(f'K: {k}\n')
+ current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
+
try:
current_date = get_date_in_timezone(self.tenant_info['timezone'])
# Subquery to find the latest version of each document
@@ -40,24 +84,31 @@ class EveAIRetriever(BaseRetriever):
# Main query to filter embeddings
query_obj = (
db.session.query(db_class,
- db_class.embedding.cosine_distance(query_embedding).label('distance'))
+ (1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity'))
.join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
.join(Document, DocumentVersion.doc_id == Document.id)
.join(subquery, DocumentVersion.id == subquery.c.latest_version_id)
.filter(
- or_(Document.valid_from.is_(None), Document.valid_from <= current_date),
- or_(Document.valid_to.is_(None), Document.valid_to >= current_date),
- db_class.embedding.cosine_distance(query_embedding) < similarity_threshold
+ or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
+ or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
+ (1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold
)
- .order_by('distance')
+ .order_by(desc('similarity'))
.limit(k)
)
+ if self.tenant_info['rag_tuning']:
+ current_app.rag_tuning_logger.debug(f'Query executed for Retrieval of documents: \n')
+ current_app.rag_tuning_logger.debug(f'{query_obj.statement}\n')
+ current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
+
res = query_obj.all()
if self.tenant_info['rag_tuning']:
- current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents')
- current_app.rag_tuning_logger.debug(f'---------------------------------------')
+ current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents \n')
+ current_app.rag_tuning_logger.debug(f'Data retrieved: \n')
+ current_app.rag_tuning_logger.debug(f'{res}\n')
+ current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
result = []
for doc in res:
diff --git a/common/langchain/__pycache__/EveAIHistoryRetriever.cpython-312.pyc b/common/langchain/__pycache__/EveAIHistoryRetriever.cpython-312.pyc
new file mode 100644
index 0000000..6eb8d74
Binary files /dev/null and b/common/langchain/__pycache__/EveAIHistoryRetriever.cpython-312.pyc differ
diff --git a/common/langchain/__pycache__/EveAIRetriever.cpython-312.pyc b/common/langchain/__pycache__/EveAIRetriever.cpython-312.pyc
new file mode 100644
index 0000000..974cf86
Binary files /dev/null and b/common/langchain/__pycache__/EveAIRetriever.cpython-312.pyc differ
diff --git a/common/models/.DS_Store b/common/models/.DS_Store
new file mode 100644
index 0000000..f3a10b2
Binary files /dev/null and b/common/models/.DS_Store differ
diff --git a/common/models/__pycache__/__init__.cpython-312.pyc b/common/models/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..11892cf
Binary files /dev/null and b/common/models/__pycache__/__init__.cpython-312.pyc differ
diff --git a/common/models/__pycache__/document.cpython-312.pyc b/common/models/__pycache__/document.cpython-312.pyc
new file mode 100644
index 0000000..471fdd2
Binary files /dev/null and b/common/models/__pycache__/document.cpython-312.pyc differ
diff --git a/common/models/__pycache__/interaction.cpython-312.pyc b/common/models/__pycache__/interaction.cpython-312.pyc
new file mode 100644
index 0000000..70d8fa6
Binary files /dev/null and b/common/models/__pycache__/interaction.cpython-312.pyc differ
diff --git a/common/models/__pycache__/user.cpython-312.pyc b/common/models/__pycache__/user.cpython-312.pyc
new file mode 100644
index 0000000..0428f31
Binary files /dev/null and b/common/models/__pycache__/user.cpython-312.pyc differ
diff --git a/common/models/document.py b/common/models/document.py
index 4b06a49..aeed9a3 100644
--- a/common/models/document.py
+++ b/common/models/document.py
@@ -97,3 +97,16 @@ class EmbeddingSmallOpenAI(Embedding):
__mapper_args__ = {
'polymorphic_identity': 'embedding_small_openai',
}
+
+
+class EmbeddingLargeOpenAI(Embedding):
+ __tablename__ = 'embedding_large_openai'
+ id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
+
+ # 3072 is the OpenAI Large Embedding dimension.
+ # If another embedding model is chosen, this dimension may need to be changed.
+ embedding = db.Column(Vector(3072), nullable=False)
+
+ __mapper_args__ = {
+ 'polymorphic_identity': 'embedding_large_openai',
+ }
diff --git a/common/models/user.py b/common/models/user.py
index e1caa2e..69e17f2 100644
--- a/common/models/user.py
+++ b/common/models/user.py
@@ -35,6 +35,9 @@ class Tenant(db.Model):
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
+ min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
+ max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
+
# Embedding search variables
es_k = db.Column(db.Integer, nullable=True, default=5)
@@ -77,6 +80,8 @@ class Tenant(db.Model):
'html_end_tags': self.html_end_tags,
'html_included_elements': self.html_included_elements,
'html_excluded_elements': self.html_excluded_elements,
+ 'min_chunk_size': self.min_chunk_size,
+ 'max_chunk_size': self.max_chunk_size,
'es_k': self.es_k,
'es_similarity_threshold': self.es_similarity_threshold,
'chat_RAG_temperature': self.chat_RAG_temperature,
diff --git a/eveai_app/static/scss/.DS_Store b/common/utils/.DS_Store
similarity index 84%
rename from eveai_app/static/scss/.DS_Store
rename to common/utils/.DS_Store
index 51fd37e..86dbb5e 100644
Binary files a/eveai_app/static/scss/.DS_Store and b/common/utils/.DS_Store differ
diff --git a/common/utils/__pycache__/__init__.cpython-312.pyc b/common/utils/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..db92efb
Binary files /dev/null and b/common/utils/__pycache__/__init__.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/celery_utils.cpython-312.pyc b/common/utils/__pycache__/celery_utils.cpython-312.pyc
new file mode 100644
index 0000000..0ec5442
Binary files /dev/null and b/common/utils/__pycache__/celery_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/cors_utils.cpython-312.pyc b/common/utils/__pycache__/cors_utils.cpython-312.pyc
new file mode 100644
index 0000000..ef730b6
Binary files /dev/null and b/common/utils/__pycache__/cors_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/database.cpython-312.pyc b/common/utils/__pycache__/database.cpython-312.pyc
new file mode 100644
index 0000000..93870c4
Binary files /dev/null and b/common/utils/__pycache__/database.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/datetime_utils.cpython-312.pyc b/common/utils/__pycache__/datetime_utils.cpython-312.pyc
new file mode 100644
index 0000000..d792341
Binary files /dev/null and b/common/utils/__pycache__/datetime_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/debug_utils.cpython-312.pyc b/common/utils/__pycache__/debug_utils.cpython-312.pyc
new file mode 100644
index 0000000..85c5fe7
Binary files /dev/null and b/common/utils/__pycache__/debug_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/key_encryption.cpython-312.pyc b/common/utils/__pycache__/key_encryption.cpython-312.pyc
new file mode 100644
index 0000000..a81d7e7
Binary files /dev/null and b/common/utils/__pycache__/key_encryption.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/middleware.cpython-312.pyc b/common/utils/__pycache__/middleware.cpython-312.pyc
new file mode 100644
index 0000000..815e2c1
Binary files /dev/null and b/common/utils/__pycache__/middleware.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/model_utils.cpython-312.pyc b/common/utils/__pycache__/model_utils.cpython-312.pyc
new file mode 100644
index 0000000..15d6528
Binary files /dev/null and b/common/utils/__pycache__/model_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/nginx_utils.cpython-312.pyc b/common/utils/__pycache__/nginx_utils.cpython-312.pyc
new file mode 100644
index 0000000..b5d239f
Binary files /dev/null and b/common/utils/__pycache__/nginx_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/security.cpython-312.pyc b/common/utils/__pycache__/security.cpython-312.pyc
new file mode 100644
index 0000000..a95c177
Binary files /dev/null and b/common/utils/__pycache__/security.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/security_utils.cpython-312.pyc b/common/utils/__pycache__/security_utils.cpython-312.pyc
new file mode 100644
index 0000000..842a2e9
Binary files /dev/null and b/common/utils/__pycache__/security_utils.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/template_filters.cpython-312.pyc b/common/utils/__pycache__/template_filters.cpython-312.pyc
new file mode 100644
index 0000000..6aeaba8
Binary files /dev/null and b/common/utils/__pycache__/template_filters.cpython-312.pyc differ
diff --git a/common/utils/__pycache__/view_assistants.cpython-312.pyc b/common/utils/__pycache__/view_assistants.cpython-312.pyc
new file mode 100644
index 0000000..6b1ae74
Binary files /dev/null and b/common/utils/__pycache__/view_assistants.cpython-312.pyc differ
diff --git a/common/utils/key_encryption.py b/common/utils/key_encryption.py
deleted file mode 100644
index 5ee1bec..0000000
--- a/common/utils/key_encryption.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from google.cloud import kms_v1
-from base64 import b64encode, b64decode
-from Crypto.Cipher import AES
-from Crypto.Random import get_random_bytes
-import random
-import time
-from flask import Flask
-import os
-import ast
-
-
-def generate_api_key(prefix="EveAI-Chat"):
- parts = [str(random.randint(1000, 9999)) for _ in range(5)]
- return f"{prefix}-{'-'.join(parts)}"
-
-
-class JosKMSClient(kms_v1.KeyManagementServiceClient):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.key_name = None
- self.crypto_key = None
- self.key_ring = None
- self.location = None
- self.project_id = None
-
- def init_app(self, app: Flask):
- self.project_id = app.config.get('GC_PROJECT_NAME')
- self.location = app.config.get('GC_LOCATION')
- self.key_ring = app.config.get('GC_KEY_RING')
- self.crypto_key = app.config.get('GC_CRYPTO_KEY')
- self.key_name = self.crypto_key_path(self.project_id, self.location, self.key_ring, self.crypto_key)
- app.logger.info(f'Project ID: {self.project_id}')
- app.logger.info(f'Location: {self.location}')
- app.logger.info(f'Key Ring: {self.key_ring}')
- app.logger.info(f'Crypto Key: {self.crypto_key}')
- app.logger.info(f'Key Name: {self.key_name}')
-
- app.logger.info(f'Service Account Key Path: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}')
-
- os.environ["GOOGLE_CLOUD_PROJECT"] = self.project_id
-
- def encrypt_api_key(self, api_key):
- """Encrypts the API key using the latest version of the KEK."""
- dek = get_random_bytes(32) # AES 256-bit key
- cipher = AES.new(dek, AES.MODE_GCM)
- ciphertext, tag = cipher.encrypt_and_digest(api_key.encode())
- # print(f'Dek: {dek}')
-
- # Encrypt the DEK using the latest version of the Google Cloud KMS key
- encrypt_response = self.encrypt(
- request={'name': self.key_name, 'plaintext': dek}
- )
- encrypted_dek = encrypt_response.ciphertext
- # print(f"Encrypted DEK: {encrypted_dek}")
- #
- # # Check
- # decrypt_response = self.decrypt(
- # request={'name': self.key_name, 'ciphertext': encrypted_dek}
- # )
- # decrypted_dek = decrypt_response.plaintext
- # print(f"Decrypted DEK: {decrypted_dek}")
-
- # Store the version of the key used
- key_version = encrypt_response.name
-
- return {
- 'key_version': key_version,
- 'encrypted_dek': b64encode(encrypted_dek).decode('utf-8'),
- 'nonce': b64encode(cipher.nonce).decode('utf-8'),
- 'tag': b64encode(tag).decode('utf-8'),
- 'ciphertext': b64encode(ciphertext).decode('utf-8')
- }
-
- def decrypt_api_key(self, encrypted_data):
- """Decrypts the API key using the specified key version."""
- if isinstance(encrypted_data, str):
- encrypted_data = ast.literal_eval(encrypted_data)
- key_version = encrypted_data['key_version']
- key_name = self.key_name
- encrypted_dek = b64decode(encrypted_data['encrypted_dek'].encode('utf-8'))
- nonce = b64decode(encrypted_data['nonce'].encode('utf-8'))
- tag = b64decode(encrypted_data['tag'].encode('utf-8'))
- ciphertext = b64decode(encrypted_data['ciphertext'].encode('utf-8'))
-
- # Decrypt the DEK using the specified version of the Google Cloud KMS key
- try:
- decrypt_response = self.decrypt(
- request={'name': key_name, 'ciphertext': encrypted_dek}
- )
- dek = decrypt_response.plaintext
- except Exception as e:
- print(f"Failed to decrypt DEK: {e}")
- return None
-
- cipher = AES.new(dek, AES.MODE_GCM, nonce=nonce)
- api_key = cipher.decrypt_and_verify(ciphertext, tag)
- return api_key.decode()
-
- def check_kms_access_and_latency(self):
- # key_name = self.crypto_key_path(self.project_id, self.location, self.key_ring, self.crypto_key)
- #
- # start_time = time.time()
- # try:
- # response = self.get_crypto_key(name=key_name)
- # end_time = time.time()
- # print(f"Response Time: {end_time - start_time} seconds")
- # print("Access to KMS is successful.")
- # except Exception as e:
- # print(f"Failed to access KMS: {e}")
- pass
diff --git a/common/utils/middleware.py b/common/utils/middleware.py
index d1c3503..e14a497 100644
--- a/common/utils/middleware.py
+++ b/common/utils/middleware.py
@@ -4,7 +4,8 @@ for handling tenant requests
"""
from flask_security import current_user
-from flask import session, current_app
+from flask import session, current_app, redirect
+from common.utils.nginx_utils import prefixed_url_for
from .database import Database
@@ -15,6 +16,10 @@ def mw_before_request():
switch tenant schema
"""
+ if 'tenant' not in session:
+ current_app.logger.warning('No tenant defined in session')
+ return redirect(prefixed_url_for('security_bp.login'))
+
tenant_id = session['tenant']['id']
if not tenant_id:
raise Exception('Cannot switch schema for tenant: no tenant defined in session')
diff --git a/common/utils/minio_utils.py b/common/utils/minio_utils.py
new file mode 100644
index 0000000..bd4e2bb
--- /dev/null
+++ b/common/utils/minio_utils.py
@@ -0,0 +1,86 @@
+from minio import Minio
+from minio.error import S3Error
+from flask import Flask
+import io
+from werkzeug.datastructures import FileStorage
+
+class MinioClient:
+ def __init__(self):
+ self.client = None
+
+ def init_app(self, app: Flask):
+ self.client = Minio(
+ app.config['MINIO_ENDPOINT'],
+ access_key=app.config['MINIO_ACCESS_KEY'],
+ secret_key=app.config['MINIO_SECRET_KEY'],
+ secure=app.config.get('MINIO_USE_HTTPS', False)
+ )
+ app.logger.info(f"MinIO client initialized with endpoint: {app.config['MINIO_ENDPOINT']}")
+
+ def generate_bucket_name(self, tenant_id):
+ return f"tenant-{tenant_id}-bucket"
+
+ def create_tenant_bucket(self, tenant_id):
+ bucket_name = self.generate_bucket_name(tenant_id)
+ try:
+ if not self.client.bucket_exists(bucket_name):
+ self.client.make_bucket(bucket_name)
+ return bucket_name
+ return bucket_name
+ except S3Error as err:
+ raise Exception(f"Error occurred while creating bucket: {err}")
+
+ def generate_object_name(self, document_id, language, version_id, filename):
+ return f"{document_id}/{language}/{version_id}/{filename}"
+
+ def upload_document_file(self, tenant_id, document_id, language, version_id, filename, file_data):
+ bucket_name = self.generate_bucket_name(tenant_id)
+ object_name = self.generate_object_name(document_id, language, version_id, filename)
+
+ try:
+ if isinstance(file_data, FileStorage):
+ file_data = file_data.read()
+ elif isinstance(file_data, io.BytesIO):
+ file_data = file_data.getvalue()
+ elif isinstance(file_data, str):
+ file_data = file_data.encode('utf-8')
+ elif not isinstance(file_data, bytes):
+ raise TypeError('Unsupported file type. Expected FileStorage, BytesIO, str, or bytes.')
+
+ self.client.put_object(
+ bucket_name, object_name, io.BytesIO(file_data), len(file_data)
+ )
+ return True
+ except S3Error as err:
+ raise Exception(f"Error occurred while uploading file: {err}")
+
+ def download_document_file(self, tenant_id, document_id, language, version_id, filename):
+ bucket_name = self.generate_bucket_name(tenant_id)
+ object_name = self.generate_object_name(document_id, language, version_id, filename)
+ try:
+ response = self.client.get_object(bucket_name, object_name)
+ return response.read()
+ except S3Error as err:
+ raise Exception(f"Error occurred while downloading file: {err}")
+
+ def list_document_files(self, tenant_id, document_id, language=None, version_id=None):
+ bucket_name = self.generate_bucket_name(tenant_id)
+ prefix = f"{document_id}/"
+ if language:
+ prefix += f"{language}/"
+ if version_id:
+ prefix += f"{version_id}/"
+ try:
+ objects = self.client.list_objects(bucket_name, prefix=prefix, recursive=True)
+ return [obj.object_name for obj in objects]
+ except S3Error as err:
+ raise Exception(f"Error occurred while listing files: {err}")
+
+ def delete_document_file(self, tenant_id, document_id, language, version_id, filename):
+ bucket_name = self.generate_bucket_name(tenant_id)
+ object_name = self.generate_object_name(document_id, language, version_id, filename)
+ try:
+ self.client.remove_object(bucket_name, object_name)
+ return True
+ except S3Error as err:
+ raise Exception(f"Error occurred while deleting file: {err}")
diff --git a/common/utils/model_utils.py b/common/utils/model_utils.py
index cdcb459..9470685 100644
--- a/common/utils/model_utils.py
+++ b/common/utils/model_utils.py
@@ -1,13 +1,16 @@
import langcodes
from flask import current_app
-from langchain_community.embeddings import OpenAIEmbeddings
-from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings, ChatOpenAI
+from langchain_anthropic import ChatAnthropic
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import ChatPromptTemplate
import ast
from typing import List
+from openai import OpenAI
+# from groq import Groq
+from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
-from common.models.document import EmbeddingSmallOpenAI
+from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI
class CitedAnswer(BaseModel):
@@ -82,17 +85,34 @@ def select_model_variables(tenant):
model_variables['html_included_elements'] = tenant.html_included_elements
model_variables['html_excluded_elements'] = tenant.html_excluded_elements
+ # Set Chunk Size variables
+ model_variables['min_chunk_size'] = tenant.min_chunk_size
+ model_variables['max_chunk_size'] = tenant.max_chunk_size
+
# Set Embedding variables
match embedding_provider:
case 'openai':
+ portkey_metadata = {'tenant_id': str(tenant.id)}
+ portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'),
+ provider='openai',
+ metadata=portkey_metadata)
match embedding_model:
case 'text-embedding-3-small':
api_key = current_app.config.get('OPENAI_API_KEY')
model_variables['embedding_model'] = OpenAIEmbeddings(api_key=api_key,
- model='text-embedding-3-small')
+ model='text-embedding-3-small',
+ base_url=PORTKEY_GATEWAY_URL,
+ default_headers=portkey_headers
+ )
model_variables['embedding_db_model'] = EmbeddingSmallOpenAI
- model_variables['min_chunk_size'] = current_app.config.get('OAI_TE3S_MIN_CHUNK_SIZE')
- model_variables['max_chunk_size'] = current_app.config.get('OAI_TE3S_MAX_CHUNK_SIZE')
+ case 'text-embedding-3-large':
+ api_key = current_app.config.get('OPENAI_API_KEY')
+ model_variables['embedding_model'] = OpenAIEmbeddings(api_key=api_key,
+ model='text-embedding-3-large',
+ base_url=PORTKEY_GATEWAY_URL,
+ default_headers=portkey_headers
+ )
+ model_variables['embedding_db_model'] = EmbeddingLargeOpenAI
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid embedding model')
@@ -103,39 +123,76 @@ def select_model_variables(tenant):
# Set Chat model variables
match llm_provider:
case 'openai':
+ portkey_metadata = {'tenant_id': str(tenant.id)}
+ portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'),
+ metadata=portkey_metadata,
+ provider='openai')
+ tool_calling_supported = False
api_key = current_app.config.get('OPENAI_API_KEY')
model_variables['llm'] = ChatOpenAI(api_key=api_key,
model=llm_model,
- temperature=model_variables['RAG_temperature'])
+ temperature=model_variables['RAG_temperature'],
+ base_url=PORTKEY_GATEWAY_URL,
+ default_headers=portkey_headers)
model_variables['llm_no_rag'] = ChatOpenAI(api_key=api_key,
model=llm_model,
- temperature=model_variables['no_RAG_temperature'])
+ temperature=model_variables['no_RAG_temperature'],
+ base_url=PORTKEY_GATEWAY_URL,
+ default_headers=portkey_headers)
tool_calling_supported = False
match llm_model:
- case 'gpt-4-turbo' | 'gpt-4o':
- summary_template = current_app.config.get('GPT4_SUMMARY_TEMPLATE')
- rag_template = current_app.config.get('GPT4_RAG_TEMPLATE')
- history_template = current_app.config.get('GPT4_HISTORY_TEMPLATE')
- encyclopedia_template = current_app.config.get('GPT4_ENCYCLOPEDIA_TEMPLATE')
+ case 'gpt-4-turbo' | 'gpt-4o' | 'gpt-4o-mini':
tool_calling_supported = True
- case 'gpt-3-5-turbo':
- summary_template = current_app.config.get('GPT3_5_SUMMARY_TEMPLATE')
- rag_template = current_app.config.get('GPT3_5_RAG_TEMPLATE')
- history_template = current_app.config.get('GPT3_5_HISTORY_TEMPLATE')
- encyclopedia_template = current_app.config.get('GPT3_5_ENCYCLOPEDIA_TEMPLATE')
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid chat model')
- model_variables['summary_template'] = summary_template
- model_variables['rag_template'] = rag_template
- model_variables['history_template'] = history_template
- model_variables['encyclopedia_template'] = encyclopedia_template
- if tool_calling_supported:
- model_variables['cited_answer_cls'] = CitedAnswer
+ case 'anthropic':
+ api_key = current_app.config.get('ANTHROPIC_API_KEY')
+ # Anthropic does not have the same 'generic' model names as OpenAI
+ llm_model_ext = current_app.config.get('ANTHROPIC_LLM_VERSIONS').get(llm_model)
+ model_variables['llm'] = ChatAnthropic(api_key=api_key,
+ model=llm_model_ext,
+ temperature=model_variables['RAG_temperature'])
+ model_variables['llm_no_rag'] = ChatAnthropic(api_key=api_key,
+ model=llm_model_ext,
+ temperature=model_variables['RAG_temperature'])
+ tool_calling_supported = True
case _:
raise Exception(f'Error setting model variables for tenant {tenant.id} '
f'error: Invalid chat provider')
+ if tool_calling_supported:
+ model_variables['cited_answer_cls'] = CitedAnswer
+
+ templates = current_app.config['PROMPT_TEMPLATES'][f'{llm_provider}.{llm_model}']
+ model_variables['summary_template'] = templates['summary']
+ model_variables['rag_template'] = templates['rag']
+ model_variables['history_template'] = templates['history']
+ model_variables['encyclopedia_template'] = templates['encyclopedia']
+ model_variables['transcript_template'] = templates['transcript']
+ model_variables['html_parse_template'] = templates['html_parse']
+ model_variables['pdf_parse_template'] = templates['pdf_parse']
+
+ model_variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model]
+
+ # Transcription Client Variables.
+ # Using Groq
+ # api_key = current_app.config.get('GROQ_API_KEY')
+ # model_variables['transcription_client'] = Groq(api_key=api_key)
+ # model_variables['transcription_model'] = 'whisper-large-v3'
+
+ # Using OpenAI for transcriptions
+ portkey_metadata = {'tenant_id': str(tenant.id)}
+ portkey_headers = createHeaders(api_key=current_app.config.get('PORTKEY_API_KEY'),
+ metadata=portkey_metadata,
+ provider='openai'
+ )
+ api_key = current_app.config.get('OPENAI_API_KEY')
+ model_variables['transcription_client'] = OpenAI(api_key=api_key,
+ base_url=PORTKEY_GATEWAY_URL,
+ default_headers=portkey_headers)
+ model_variables['transcription_model'] = 'whisper-1'
+
return model_variables
diff --git a/common/utils/os_utils.py b/common/utils/os_utils.py
new file mode 100644
index 0000000..9f1354f
--- /dev/null
+++ b/common/utils/os_utils.py
@@ -0,0 +1,30 @@
+import os
+import gevent
+import time
+from flask import current_app
+
+
+def safe_remove(file_path):
+ try:
+ if os.path.exists(file_path):
+ os.remove(file_path)
+
+ # Wait for the file to be deleted
+ start_time = time.time()
+ while os.path.exists(file_path):
+ gevent.sleep(1)
+ if time.time() - start_time > 5: # 5 second timeout
+ raise TimeoutError(f"Failed to delete {file_path} after 5 seconds")
+
+ current_app.logger.info(f"Successfully deleted {file_path}")
+ else:
+ current_app.logger.info(f"{file_path} does not exist, skipping deletion")
+ except Exception as e:
+ current_app.logger.error(f"Error deleting {file_path}: {str(e)}")
+ raise
+
+
+def sync_folder(file_path):
+ dir_fd = os.open(file_path, os.O_RDONLY)
+ os.fsync(dir_fd)
+ os.close(dir_fd)
diff --git a/common/utils/prompt_loader.py b/common/utils/prompt_loader.py
new file mode 100644
index 0000000..8ecfa60
--- /dev/null
+++ b/common/utils/prompt_loader.py
@@ -0,0 +1,15 @@
+import os
+import yaml
+
+
+def load_prompt_templates(model_name):
+ provider, model = model_name.split('.')
+ file_path = os.path.join('config', 'prompts', provider, f'{model}.yaml')
+
+ if not os.path.exists(file_path):
+ raise FileNotFoundError(f"No prompt template file found for {model_name}")
+
+ with open(file_path, 'r') as file:
+ templates = yaml.safe_load(file)
+
+ return templates
diff --git a/common/utils/simple_encryption.py b/common/utils/simple_encryption.py
new file mode 100644
index 0000000..5cfc51b
--- /dev/null
+++ b/common/utils/simple_encryption.py
@@ -0,0 +1,45 @@
+import random
+from cryptography.fernet import Fernet
+from flask import Flask
+
+
+def generate_api_key(prefix="EveAI-Chat"):
+ parts = [str(random.randint(1000, 9999)) for _ in range(5)]
+ return f"{prefix}-{'-'.join(parts)}"
+
+
+class SimpleEncryption:
+ def __init__(self, app: Flask = None):
+ self.app = app
+ self.fernet = None
+ if app is not None:
+ self.init_app(app)
+
+ def init_app(self, app: Flask):
+ self.app = app
+ encryption_key = app.config.get('API_ENCRYPTION_KEY')
+ if not encryption_key:
+ raise ValueError("ENCRYPTION_KEY is not set in the app configuration")
+ self.fernet = Fernet(encryption_key.encode())
+
+ # Optionally log the initialization (similar to your current setup)
+ app.logger.info('SimpleEncryption initialized')
+
+ def encrypt_api_key(self, api_key: str) -> str:
+ if not self.fernet:
+ raise RuntimeError("SimpleEncryption is not initialized. Call init_app first.")
+ return self.fernet.encrypt(api_key.encode()).decode()
+
+ def decrypt_api_key(self, encrypted_api_key: str) -> str:
+ if not self.fernet:
+ raise RuntimeError("SimpleEncryption is not initialized. Call init_app first.")
+ return self.fernet.decrypt(encrypted_api_key.encode()).decode()
+
+ @staticmethod
+ def generate_key() -> str:
+ """Generate a new Fernet key."""
+ return Fernet.generate_key().decode()
+
+# Usage:
+# from common.utils.simple_encryption import simple_encryption
+# simple_encryption.init_app(app)
diff --git a/common/utils/view_assistants.py b/common/utils/view_assistants.py
index 6780fa2..3d94a8f 100644
--- a/common/utils/view_assistants.py
+++ b/common/utils/view_assistants.py
@@ -43,4 +43,8 @@ def form_validation_failed(request, form):
if request.method == 'POST':
for fieldName, errorMessages in form.errors.items():
for err in errorMessages:
- flash(f"Error in {fieldName}: {err}", 'danger')
\ No newline at end of file
+ flash(f"Error in {fieldName}: {err}", 'danger')
+
+
+def form_to_dict(form):
+ return {field.name: field.data for field in form if field.name != 'csrf_token' and hasattr(field, 'data')}
\ No newline at end of file
diff --git a/config/.DS_Store b/config/.DS_Store
new file mode 100644
index 0000000..19a305d
Binary files /dev/null and b/config/.DS_Store differ
diff --git a/config/__pycache__/__init__.cpython-312.pyc b/config/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..381a0b7
Binary files /dev/null and b/config/__pycache__/__init__.cpython-312.pyc differ
diff --git a/config/__pycache__/config.cpython-312.pyc b/config/__pycache__/config.cpython-312.pyc
new file mode 100644
index 0000000..44122e4
Binary files /dev/null and b/config/__pycache__/config.cpython-312.pyc differ
diff --git a/config/__pycache__/logging_config.cpython-312.pyc b/config/__pycache__/logging_config.cpython-312.pyc
new file mode 100644
index 0000000..8e971ca
Binary files /dev/null and b/config/__pycache__/logging_config.cpython-312.pyc differ
diff --git a/config/config.py b/config/config.py
index a3f80d5..d37db88 100644
--- a/config/config.py
+++ b/config/config.py
@@ -2,17 +2,22 @@ from os import environ, path
from datetime import timedelta
import redis
+from common.utils.prompt_loader import load_prompt_templates
+
basedir = path.abspath(path.dirname(__file__))
class Config(object):
DEBUG = False
DEVELOPMENT = False
- SECRET_KEY = '97867c1491bea5ee6a8e8436eb11bf2ba6a69ff53ab1b17ecba450d0f2e572e1'
+ SECRET_KEY = environ.get('SECRET_KEY')
SESSION_COOKIE_SECURE = False
SESSION_COOKIE_HTTPONLY = True
+ SESSION_KEY_PREFIX = f'{environ.get('COMPONENT_NAME')}_'
WTF_CSRF_ENABLED = True
+ WTF_CSRF_TIME_LIMIT = None
+ WTF_CSRF_SSL_STRICT = False # Set to True if using HTTPS
# flask-security-too settings
# SECURITY_URL_PREFIX = '/admin'
@@ -27,9 +32,9 @@ class Config(object):
# SECURITY_POST_RESET_VIEW = '/admin/login'
# SECURITY_POST_CHANGE_VIEW = '/admin/login'
# SECURITY_BLUEPRINT_NAME = 'security_bp'
- SECURITY_PASSWORD_SALT = '228614859439123264035565568761433607235'
+ SECURITY_PASSWORD_SALT = environ.get('SECURITY_PASSWORD_SALT')
REMEMBER_COOKIE_SAMESITE = 'strict'
- SESSION_COOKIE_SAMESITE = 'strict'
+ SESSION_COOKIE_SAMESITE = 'Lax'
SECURITY_CONFIRMABLE = True
SECURITY_TRACKABLE = True
SECURITY_PASSWORD_COMPLEXITY_CHECKER = 'zxcvbn'
@@ -43,13 +48,6 @@ class Config(object):
SECURITY_CSRF_HEADER = 'X-XSRF-TOKEN'
WTF_CSRF_CHECK_DEFAULT = False
- # flask-mailman settings
- MAIL_SERVER = 'mail.flow-it.net'
- MAIL_PORT = 587
- MAIL_USE_TLS = True
- MAIL_USE_SSL = False
- MAIL_DEFAULT_SENDER = ('eveAI Admin', 'eveai_admin@flow-it.net')
-
# file upload settings
MAX_CONTENT_LENGTH = 16 * 1024 * 1024
UPLOAD_EXTENSIONS = ['.txt', '.pdf', '.png', '.jpg', '.jpeg', '.gif']
@@ -58,8 +56,32 @@ class Config(object):
SUPPORTED_LANGUAGES = ['en', 'fr', 'nl', 'de', 'es']
# supported LLMs
- SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'mistral.mistral-embed']
- SUPPORTED_LLMS = ['openai.gpt-4o', 'openai.gpt-4-turbo', 'openai.gpt-3.5-turbo', 'mistral.mistral-large-2402']
+ SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'openai.text-embedding-3-large', 'mistral.mistral-embed']
+ SUPPORTED_LLMS = ['openai.gpt-4o', 'anthropic.claude-3-5-sonnet', 'openai.gpt-4o-mini']
+
+ ANTHROPIC_LLM_VERSIONS = {'claude-3-5-sonnet': 'claude-3-5-sonnet-20240620', }
+
+ # Load prompt templates dynamically
+ PROMPT_TEMPLATES = {model: load_prompt_templates(model) for model in SUPPORTED_LLMS}
+
+ # Annotation text chunk length
+ ANNOTATION_TEXT_CHUNK_LENGTH = {
+ 'openai.gpt-4o': 10000,
+ 'openai.gpt-4o-mini': 10000,
+ 'anthropic.claude-3-5-sonnet': 8000
+ }
+
+ # OpenAI API Keys
+ OPENAI_API_KEY = environ.get('OPENAI_API_KEY')
+
+ # Groq API Keys
+ GROQ_API_KEY = environ.get('GROQ_API_KEY')
+
+ # Anthropic API Keys
+ ANTHROPIC_API_KEY = environ.get('ANTHROPIC_API_KEY')
+
+ # Portkey API Keys
+ PORTKEY_API_KEY = environ.get('PORTKEY_API_KEY')
# Celery settings
CELERY_TASK_SERIALIZER = 'json'
@@ -68,72 +90,23 @@ class Config(object):
CELERY_TIMEZONE = 'UTC'
CELERY_ENABLE_UTC = True
- # Chunk Definition, Embedding dependent
- OAI_TE3S_MIN_CHUNK_SIZE = 2000
- OAI_TE3S_MAX_CHUNK_SIZE = 3000
-
- # LLM TEMPLATES
- GPT4_SUMMARY_TEMPLATE = """Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
- ```{text}```"""
- GPT3_5_SUMMARY_TEMPLATE = """Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
- ```{text}```"""
-
- GPT4_RAG_TEMPLATE = """Answer the question based on the following context, delimited between triple backquotes.
- {tenant_context}
- Use the following {language} in your communication, and cite the sources used.
- If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
- Context:
- ```{context}```
- Question:
- {question}"""
- GPT3_5_RAG_TEMPLATE = """Answer the question based on the following context, delimited between triple backquotes.
- {tenant_context}
- Use the following {language} in your communication.
- If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
- Context:
- ```{context}```
- Question:
- {question}"""
-
- GPT4_HISTORY_TEMPLATE = """You are a helpful assistant that details a question based on a previous context,
- in such a way that the question is understandable without the previous context.
- {tenant_context}
- The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
- The history is delimited between triple backquotes.
- You answer by stating the question in {language}.
- History:
- ```{history}```
- Question to be detailed:
- {question}"""
-
- GPT3_5_HISTORY_TEMPLATE = """You are a helpful assistant that details a question based on a previous context,
- in such a way that the question is understandable without the previous context.
- {tenant_context}
- The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
- The history is delimited between triple backquotes.
- You answer by stating the question in {language}.
- History:
- ```{history}```
- Question to be detailed:
- {question}"""
-
- GPT4_ENCYCLOPEDIA_TEMPLATE = """You have a lot of background knowledge, and as such you are some kind of
- 'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question.
- If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
- Question:
- {question}"""
-
# SocketIO settings
# SOCKETIO_ASYNC_MODE = 'threading'
SOCKETIO_ASYNC_MODE = 'gevent'
# Session Settings
SESSION_TYPE = 'redis'
- SESSION_PERMANENT = False
+ SESSION_PERMANENT = True
SESSION_USE_SIGNER = True
PERMANENT_SESSION_LIFETIME = timedelta(minutes=60)
SESSION_REFRESH_EACH_REQUEST = True
+ # JWT settings
+ JWT_SECRET_KEY = environ.get('JWT_SECRET_KEY')
+
+ # API Encryption
+ API_ENCRYPTION_KEY = environ.get('API_ENCRYPTION_KEY')
+
# Fallback Algorithms
FALLBACK_ALGORITHMS = [
"RAG_TENANT",
@@ -155,11 +128,104 @@ class DevConfig(Config):
DEVELOPMENT = True
DEBUG = True
FLASK_DEBUG = True
- PYCHARM_DEBUG = False
- SQLALCHEMY_DATABASE_URI = 'postgresql+pg8000://josako@localhost:5432/eveAI'
- SQLALCHEMY_BINDS = {'public': 'postgresql+pg8000://josako@localhost:5432/eveAI'}
EXPLAIN_TEMPLATE_LOADING = False
+ # Database Settings
+ DB_HOST = environ.get('DB_HOST', 'localhost')
+ DB_USER = environ.get('DB_USER', 'luke')
+ DB_PASS = environ.get('DB_PASS', 'Skywalker!')
+ DB_NAME = environ.get('DB_NAME', 'eveai')
+ SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:5432/{DB_NAME}'
+ SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
+
+ # flask-mailman settings
+ MAIL_SERVER = 'mail.flow-it.net'
+ MAIL_PORT = 587
+ MAIL_USE_TLS = True
+ MAIL_USE_SSL = False
+ MAIL_DEFAULT_SENDER = ('eveAI Admin', 'eveai_admin@flow-it.net')
+ MAIL_USERNAME = environ.get('MAIL_USERNAME')
+ MAIL_PASSWORD = environ.get('MAIL_PASSWORD')
+
+ # Define the nginx prefix used for the specific apps
+ EVEAI_APP_LOCATION_PREFIX = '/admin'
+ EVEAI_CHAT_LOCATION_PREFIX = '/chat'
+
+ # file upload settings
+ # UPLOAD_FOLDER = '/app/tenant_files'
+
+ # Celery settings
+ # eveai_app Redis Settings
+ CELERY_BROKER_URL = 'redis://redis:6379/0'
+ CELERY_RESULT_BACKEND = 'redis://redis:6379/0'
+ # eveai_chat Redis Settings
+ CELERY_BROKER_URL_CHAT = 'redis://redis:6379/3'
+ CELERY_RESULT_BACKEND_CHAT = 'redis://redis:6379/3'
+
+ # Unstructured settings
+ # UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
+ # UNSTRUCTURED_BASE_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io'
+ # UNSTRUCTURED_FULL_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io/general/v0/general'
+
+ # SocketIO settings
+ SOCKETIO_MESSAGE_QUEUE = 'redis://redis:6379/1'
+ SOCKETIO_CORS_ALLOWED_ORIGINS = '*'
+ SOCKETIO_LOGGER = True
+ SOCKETIO_ENGINEIO_LOGGER = True
+ SOCKETIO_PING_TIMEOUT = 20000
+ SOCKETIO_PING_INTERVAL = 25000
+ SOCKETIO_MAX_IDLE_TIME = timedelta(minutes=60) # Changing this value ==> change maxConnectionDuration value in
+ # eveai-chat-widget.js
+
+ # Google Cloud settings
+ GC_PROJECT_NAME = 'eveai-420711'
+ GC_LOCATION = 'europe-west1'
+ GC_KEY_RING = 'eveai-chat'
+ GC_CRYPTO_KEY = 'envelope-encryption-key'
+
+ # Session settings
+ SESSION_REDIS = redis.from_url('redis://redis:6379/2')
+
+ # PATH settings
+ ffmpeg_path = '/usr/bin/ffmpeg'
+
+ # MINIO
+ MINIO_ENDPOINT = 'minio:9000'
+ MINIO_ACCESS_KEY = 'minioadmin'
+ MINIO_SECRET_KEY = 'minioadmin'
+ MINIO_USE_HTTPS = False
+
+
+class ProdConfig(Config):
+ DEVELOPMENT = False
+ DEBUG = False
+ DEBUG = False
+ FLASK_DEBUG = False
+ EXPLAIN_TEMPLATE_LOADING = False
+
+ # SESSION SETTINGS
+ SESSION_COOKIE_SECURE = True
+
+ WTF_CSRF_SSL_STRICT = True # Set to True if using HTTPS
+
+ # Database Settings
+ DB_HOST = environ.get('DB_HOST')
+ DB_USER = environ.get('DB_USER')
+ DB_PASS = environ.get('DB_PASS')
+ DB_NAME = environ.get('DB_NAME')
+ DB_PORT = environ.get('DB_PORT')
+ SQLALCHEMY_DATABASE_URI = f'postgresql+pg8000://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
+ SQLALCHEMY_BINDS = {'public': SQLALCHEMY_DATABASE_URI}
+
+ # flask-mailman settings
+ MAIL_SERVER = 'mail.askeveai.com'
+ MAIL_PORT = 587
+ MAIL_USE_TLS = True
+ MAIL_USE_SSL = False
+ MAIL_DEFAULT_SENDER = ('Evie Admin', 'evie_admin@askeveai.com')
+ MAIL_USERNAME = environ.get('MAIL_USERNAME')
+ MAIL_PASSWORD = environ.get('MAIL_PASSWORD')
+
# Define the nginx prefix used for the specific apps
EVEAI_APP_LOCATION_PREFIX = '/admin'
EVEAI_CHAT_LOCATION_PREFIX = '/chat'
@@ -169,29 +235,35 @@ class DevConfig(Config):
MAIL_PASSWORD = '$6xsWGbNtx$CFMQZqc*'
# file upload settings
- UPLOAD_FOLDER = '/Volumes/OWC4M2_1/Development/eveAI/file_store'
+ # UPLOAD_FOLDER = '/app/tenant_files'
+
+ # Redis Settings
+ REDIS_USER = environ.get('REDIS_USER')
+ REDIS_PASS = environ.get('REDIS_PASS')
+ REDIS_URL = environ.get('REDIS_URL')
+ REDIS_PORT = environ.get('REDIS_PORT', '6379')
+ REDIS_BASE_URI = f'redis://{REDIS_USER}:{REDIS_PASS}@{REDIS_URL}:{REDIS_PORT}'
# Celery settings
# eveai_app Redis Settings
- CELERY_BROKER_URL = 'redis://localhost:6379/0'
- CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
+ CELERY_BROKER_URL = f'{REDIS_BASE_URI}/0'
+ CELERY_RESULT_BACKEND = f'{REDIS_BASE_URI}/0'
# eveai_chat Redis Settings
- CELERY_BROKER_URL_CHAT = 'redis://localhost:6379/3'
- CELERY_RESULT_BACKEND_CHAT = 'redis://localhost:6379/3'
+ CELERY_BROKER_URL_CHAT = f'{REDIS_BASE_URI}/3'
+ CELERY_RESULT_BACKEND_CHAT = f'{REDIS_BASE_URI}/3'
- # OpenAI API Keys
- OPENAI_API_KEY = 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
-
- # Unstructured settings
- UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
- UNSTRUCTURED_BASE_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io'
- UNSTRUCTURED_FULL_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io/general/v0/general'
+ # Session settings
+ SESSION_REDIS = redis.from_url(f'{REDIS_BASE_URI}/2')
# SocketIO settings
- SOCKETIO_MESSAGE_QUEUE = 'redis://localhost:6379/1'
+ SOCKETIO_MESSAGE_QUEUE = f'{REDIS_BASE_URI}/1'
SOCKETIO_CORS_ALLOWED_ORIGINS = '*'
SOCKETIO_LOGGER = True
SOCKETIO_ENGINEIO_LOGGER = True
+ SOCKETIO_PING_TIMEOUT = 20000
+ SOCKETIO_PING_INTERVAL = 25000
+ SOCKETIO_MAX_IDLE_TIME = timedelta(minutes=60) # Changing this value ==> change maxConnectionDuration value in
+ # eveai-chat-widget.js
# Google Cloud settings
GC_PROJECT_NAME = 'eveai-420711'
@@ -199,22 +271,20 @@ class DevConfig(Config):
GC_KEY_RING = 'eveai-chat'
GC_CRYPTO_KEY = 'envelope-encryption-key'
- # JWT settings
- JWT_SECRET_KEY = 'bsdMkmQ8ObfMD52yAFg4trrvjgjMhuIqg2fjDpD/JqvgY0ccCcmlsEnVFmR79WPiLKEA3i8a5zmejwLZKl4v9Q=='
+ # PATH settings
+ ffmpeg_path = '/usr/bin/ffmpeg'
- # Session settings
- SESSION_REDIS = redis.from_url('redis://localhost:6379/2')
+ # MINIO
+ MINIO_ENDPOINT = environ.get('MINIO_ENDPOINT')
+ MINIO_ACCESS_KEY = environ.get('MINIO_ACCESS_KEY')
+ MINIO_SECRET_KEY = environ.get('MINIO_SECRET_KEY')
+ MINIO_USE_HTTPS = True
-class ProdConfig(Config):
- DEVELOPMENT = False
- DEBUG = False
- # SQLALCHEMY_DATABASE_URI = environ.get('SQLALCHEMY_DATABASE_URI') or \
- # 'sqlite:///' + os.path.join(basedir, 'db.sqlite')
-
-
-config = {
- 'dev': DevConfig(),
- 'prod': ProdConfig(),
- 'default': DevConfig(),
-}
+def get_config(config_name='dev'):
+ configs = {
+ 'dev': DevConfig,
+ 'prod': ProdConfig,
+ 'default': DevConfig,
+ }
+ return configs.get(config_name)
diff --git a/config/logging_config.py b/config/logging_config.py
index 60fb4c2..790351e 100644
--- a/config/logging_config.py
+++ b/config/logging_config.py
@@ -1,3 +1,29 @@
+import os
+from graypy import GELFUDPHandler
+import logging
+import logging.config
+
+# Graylog configuration
+GRAYLOG_HOST = os.environ.get('GRAYLOG_HOST', 'localhost')
+GRAYLOG_PORT = int(os.environ.get('GRAYLOG_PORT', 12201))
+env = os.environ.get('FLASK_ENV', 'development')
+
+
+class CustomLogRecord(logging.LogRecord):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.component = os.environ.get('COMPONENT_NAME', 'eveai_app') # Set default component value here
+
+
+def custom_log_record_factory(*args, **kwargs):
+ record = CustomLogRecord(*args, **kwargs)
+ return record
+
+
+# Set the custom log record factory
+logging.setLogRecordFactory(custom_log_record_factory)
+
+
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
@@ -6,7 +32,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_app.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -14,7 +40,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_workers.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -22,7 +48,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -30,7 +56,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat_workers.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -38,7 +64,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/sqlalchemy.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -46,7 +72,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/mailman.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -54,7 +80,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/security.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -62,7 +88,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/rag_tuning.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -70,7 +96,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/embed_tuning.log',
- 'maxBytes': 1024*1024*5, # 5MB
+ 'maxBytes': 1024 * 1024 * 5, # 5MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -79,55 +105,69 @@ LOGGING = {
'level': 'DEBUG',
'formatter': 'standard',
},
+ 'graylog': {
+ 'level': 'DEBUG',
+ 'class': 'graypy.GELFUDPHandler',
+ 'host': GRAYLOG_HOST,
+ 'port': GRAYLOG_PORT,
+ 'debugging_fields': True, # Set to True if you want to include debugging fields
+ 'extra_fields': True, # Set to True if you want to include extra fields
+ },
},
'formatters': {
'standard': {
- 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
+ 'format': '%(asctime)s [%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
+ '[Thread: %(threadName)s]: %(message)s'
+ },
+ 'graylog': {
+ 'format': '[%(levelname)s] %(name)s (%(component)s) [%(module)s:%(lineno)d in %(funcName)s] '
+ '[Thread: %(threadName)s]: %(message)s',
+ 'datefmt': '%Y-%m-%d %H:%M:%S',
},
},
'loggers': {
'eveai_app': { # logger for the eveai_app
- 'handlers': ['file_app',],
+ 'handlers': ['file_app', 'graylog', ] if env == 'production' else ['file_app', ],
'level': 'DEBUG',
'propagate': False
},
'eveai_workers': { # logger for the eveai_workers
- 'handlers': ['file_workers',],
+ 'handlers': ['file_workers', 'graylog', ] if env == 'production' else ['file_workers', ],
'level': 'DEBUG',
'propagate': False
},
'eveai_chat': { # logger for the eveai_chat
- 'handlers': ['file_chat',],
+ 'handlers': ['file_chat', 'graylog', ] if env == 'production' else ['file_chat', ],
'level': 'DEBUG',
'propagate': False
},
'eveai_chat_workers': { # logger for the eveai_chat_workers
- 'handlers': ['file_chat_workers',],
+ 'handlers': ['file_chat_workers', 'graylog', ] if env == 'production' else ['file_chat_workers', ],
'level': 'DEBUG',
'propagate': False
},
'sqlalchemy.engine': { # logger for the sqlalchemy
- 'handlers': ['file_sqlalchemy',],
+ 'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ],
'level': 'DEBUG',
'propagate': False
},
'mailman': { # logger for the mailman
- 'handlers': ['file_mailman', 'console'],
+ 'handlers': ['file_mailman', 'graylog', ] if env == 'production' else ['file_mailman', ],
'level': 'DEBUG',
'propagate': False
},
'security': { # logger for the security
- 'handlers': ['file_security', 'console'],
+ 'handlers': ['file_security', 'graylog', ] if env == 'production' else ['file_security', ],
'level': 'DEBUG',
'propagate': False
},
'rag_tuning': { # logger for the rag_tuning
- 'handlers': ['file_rag_tuning', 'console'],
+ 'handlers': ['file_rag_tuning', 'graylog', ] if env == 'production' else ['file_rag_tuning', ],
'level': 'DEBUG',
'propagate': False
},
'embed_tuning': { # logger for the embed_tuning
- 'handlers': ['file_embed_tuning', 'console'],
+ 'handlers': ['file_embed_tuning', 'graylog', ] if env == 'production' else ['file_embed_tuning', ],
'level': 'DEBUG',
'propagate': False
},
@@ -137,4 +177,4 @@ LOGGING = {
'propagate': False
},
}
-}
\ No newline at end of file
+}
diff --git a/config/prompts/anthropic/claude-3-5-sonnet.yaml b/config/prompts/anthropic/claude-3-5-sonnet.yaml
new file mode 100644
index 0000000..c2f2106
--- /dev/null
+++ b/config/prompts/anthropic/claude-3-5-sonnet.yaml
@@ -0,0 +1,88 @@
+html_parse: |
+ You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+ # Best practices are:
+ - Respect wordings and language(s) used in the HTML.
+ - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+ - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.
+ - Be careful of encoding of the text. Everything needs to be human readable.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
+
+ HTML is between triple backticks.
+
+ ```{html}```
+
+pdf_parse: |
+ You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+ # Best practices are:
+ - Respect wordings and language(s) used in the PDF.
+ - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+ - When headings are numbered, show the numbering and define the header level.
+ - A new item is started when a is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
+ - Paragraphs are to be stripped of newlines so they become easily readable.
+ - Be careful of encoding of the text. Everything needs to be human readable.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
+
+ PDF content is between triple backticks.
+
+ ```{pdf_content}```
+
+summary: |
+ Write a concise summary of the text in {language}. The text is delimited between triple backticks.
+ ```{text}```
+
+rag: |
+ Answer the question based on the following context, delimited between triple backticks.
+ {tenant_context}
+ Use the following {language} in your communication, and cite the sources used.
+ If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
+ Context:
+ ```{context}```
+ Question:
+ {question}
+
+history: |
+ You are a helpful assistant that details a question based on a previous context,
+ in such a way that the question is understandable without the previous context.
+ The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
+ The history is delimited between triple backticks.
+ You answer by stating the question in {language}.
+ History:
+ ```{history}```
+ Question to be detailed:
+ {question}
+
+encyclopedia: |
+ You have a lot of background knowledge, and as such you are some kind of
+ 'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question.
+ If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
+ Question:
+ {question}
+
+transcript: |
+ """You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. Your task is to process and improve the given transcript, not to summarize it.
+
+ IMPORTANT INSTRUCTIONS:
+ 1. DO NOT summarize the transcript and don't make your own interpretations. Return the FULL, COMPLETE transcript with improvements.
+ 2. Improve any errors in the transcript based on context.
+ 3. Respect the original wording and language(s) used in the transcription. Main Language used is {language}.
+ 4. Divide the transcript into paragraphs for better readability. Each paragraph ONLY contains ORIGINAL TEXT.
+ 5. Group related paragraphs into logical sections.
+ 6. Add appropriate headers (using markdown syntax) to each section in {language}.
+ 7. We do not need an overall title. Just add logical headers
+ 8. Ensure that the entire transcript is included in your response, from start to finish.
+
+ REMEMBER:
+ - Your output should be the complete transcript in markdown format, NOT A SUMMARY OR ANALYSIS.
+ - Include EVERYTHING from the original transcript, just organized and formatted better.
+ - Just return the markdown version of the transcript, without any other text such as an introduction or a summary.
+
+ Here is the transcript to process (between triple backticks):
+
+ ```{transcript}```
+
+ Process this transcript according to the instructions above and return the full, formatted markdown version.
+ """
\ No newline at end of file
diff --git a/config/prompts/openai/gpt-4o-mini.yaml b/config/prompts/openai/gpt-4o-mini.yaml
new file mode 100644
index 0000000..2413299
--- /dev/null
+++ b/config/prompts/openai/gpt-4o-mini.yaml
@@ -0,0 +1,79 @@
+html_parse: |
+ You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+ # Best practices are:
+ - Respect wordings and language(s) used in the HTML.
+ - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+ - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.
+ - Be careful of encoding of the text. Everything needs to be human readable.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
+
+ HTML is between triple backquotes.
+
+ ```{html}```
+
+pdf_parse: |
+ You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+ # Best practices are:
+ - Respect wordings and language(s) used in the PDF.
+ - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+ - When headings are numbered, show the numbering and define the header level.
+ - A new item is started when a is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
+ - Paragraphs are to be stripped of newlines so they become easily readable.
+ - Be careful of encoding of the text. Everything needs to be human readable.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
+
+ PDF content is between triple backquotes.
+
+ ```{pdf_content}```
+
+summary: |
+ Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
+ ```{text}```
+
+rag: |
+ Answer the question based on the following context, delimited between triple backquotes.
+ {tenant_context}
+ Use the following {language} in your communication, and cite the sources used.
+ If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
+ Context:
+ ```{context}```
+ Question:
+ {question}
+
+history: |
+ You are a helpful assistant that details a question based on a previous context,
+ in such a way that the question is understandable without the previous context.
+ The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
+ The history is delimited between triple backquotes.
+ You answer by stating the question in {language}.
+ History:
+ ```{history}```
+ Question to be detailed:
+ {question}
+
+encyclopedia: |
+ You have a lot of background knowledge, and as such you are some kind of
+ 'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question.
+ If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
+ Question:
+ {question}
+
+transcript: |
+ You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
+
+ # Best practices and steps are:
+ - Respect wordings and language(s) used in the transcription. Main language is {language}.
+ - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.
+ - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
+ - annotate the text to identify these logical parts using headings in {language}.
+ - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
+
+ The transcript is between triple backquotes.
+
+ ```{transcript}```
\ No newline at end of file
diff --git a/config/prompts/openai/gpt-4o.yaml b/config/prompts/openai/gpt-4o.yaml
new file mode 100644
index 0000000..2413299
--- /dev/null
+++ b/config/prompts/openai/gpt-4o.yaml
@@ -0,0 +1,79 @@
+html_parse: |
+ You are a top administrative assistant specialized in transforming given HTML into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+ # Best practices are:
+ - Respect wordings and language(s) used in the HTML.
+ - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+ - Sub-headers can be used as lists. This is true when a header is followed by a series of sub-headers without content (paragraphs or listed items). Present those sub-headers as a list.
+ - Be careful of encoding of the text. Everything needs to be human readable.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input html file. Answer with the pure markdown, without any other text.
+
+ HTML is between triple backquotes.
+
+ ```{html}```
+
+pdf_parse: |
+ You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
+
+ # Best practices are:
+ - Respect wordings and language(s) used in the PDF.
+ - The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
+ - When headings are numbered, show the numbering and define the header level.
+ - A new item is started when a is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
+ - Paragraphs are to be stripped of newlines so they become easily readable.
+ - Be careful of encoding of the text. Everything needs to be human readable.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of the processing of the complete input pdf content. Answer with the pure markdown, without any other text.
+
+ PDF content is between triple backquotes.
+
+ ```{pdf_content}```
+
+summary: |
+ Write a concise summary of the text in {language}. The text is delimited between triple backquotes.
+ ```{text}```
+
+rag: |
+ Answer the question based on the following context, delimited between triple backquotes.
+ {tenant_context}
+ Use the following {language} in your communication, and cite the sources used.
+ If the question cannot be answered using the given context, say "I have insufficient information to answer this question."
+ Context:
+ ```{context}```
+ Question:
+ {question}
+
+history: |
+ You are a helpful assistant that details a question based on a previous context,
+ in such a way that the question is understandable without the previous context.
+ The context is a conversation history, with the HUMAN asking questions, the AI answering questions.
+ The history is delimited between triple backquotes.
+ You answer by stating the question in {language}.
+ History:
+ ```{history}```
+ Question to be detailed:
+ {question}
+
+encyclopedia: |
+ You have a lot of background knowledge, and as such you are some kind of
+ 'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question.
+ If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
+ Question:
+ {question}
+
+transcript: |
+ You are a top administrative assistant specialized in transforming given transcriptions into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system. The transcriptions originate from podcast, videos and similar material.
+
+ # Best practices and steps are:
+ - Respect wordings and language(s) used in the transcription. Main language is {language}.
+ - Sometimes, the transcript contains speech of several people participating in a conversation. Although these are not obvious from reading the file, try to detect when other people are speaking.
+ - Divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
+ - annotate the text to identify these logical parts using headings in {language}.
+ - improve errors in the transcript given the context, but do not change the meaning and intentions of the transcription.
+
+ Process the file carefully, and take a stepped approach. The resulting markdown should be the result of processing the complete input transcription. Answer with the pure markdown, without any other text.
+
+ The transcript is between triple backquotes.
+
+ ```{transcript}```
\ No newline at end of file
diff --git a/docker/.env b/docker/.env
new file mode 100644
index 0000000..0353d22
--- /dev/null
+++ b/docker/.env
@@ -0,0 +1 @@
+COMPOSE_PROJECT_NAME=eveai_development
\ No newline at end of file
diff --git a/docker/build_and_push_eveai.sh b/docker/build_and_push_eveai.sh
new file mode 100755
index 0000000..fd49adc
--- /dev/null
+++ b/docker/build_and_push_eveai.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+# Exit on any error
+set -e
+
+. ./docker_env_switch.sh dev
+
+# Load environment variables
+source .env
+
+# Docker registry
+REGISTRY="josakola"
+
+# Tag (you might want to use a version or git commit hash)
+TAG="latest"
+
+# Platforms to build for
+PLATFORMS="linux/amd64,linux/arm64"
+
+# Default action
+ACTION="both"
+
+# Default build options
+NO_CACHE=""
+PROGRESS=""
+DEBUG=""
+
+# Function to display usage information
+usage() {
+ echo "Usage: $0 [-b|-p] [--no-cache] [--progress=plain] [--debug] [service1 service2 ...]"
+ echo " -b: Build only (for current platform)"
+ echo " -p: Push only (multi-platform)"
+ echo " --no-cache: Perform a clean build without using cache"
+ echo " --progress=plain: Show detailed progress of the build"
+ echo " --debug: Enable debug mode for the build"
+ echo " If no option is provided, both build and push will be performed."
+ echo " If no services are specified, all eveai_ services and nginx will be processed."
+}
+
+# Parse command-line options
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ -b)
+ ACTION="build"
+ shift
+ ;;
+ -p)
+ ACTION="push"
+ shift
+ ;;
+ --no-cache)
+ NO_CACHE="--no-cache"
+ shift
+ ;;
+ --progress=plain)
+ PROGRESS="--progress=plain"
+ shift
+ ;;
+ --debug)
+ DEBUG="--debug"
+ shift
+ ;;
+ -*)
+ echo "Unknown option: $1"
+ usage
+ exit 1
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+# Function to build and/or push a service
+process_service() {
+ local SERVICE="$1"
+ echo "Processing $SERVICE..."
+
+ # Extract the build context and dockerfile from the compose file
+ CONTEXT=$(yq e ".services.$SERVICE.build.context" compose_dev.yaml)
+ DOCKERFILE=$(yq e ".services.$SERVICE.build.dockerfile" compose_dev.yaml)
+
+ # Check if context directory exists
+ if [ ! -d "$CONTEXT" ]; then
+ echo "Error: Build context directory '$CONTEXT' for service '$SERVICE' does not exist."
+ return 1
+ fi
+
+ # Check if Dockerfile exists
+ if [ ! -f "$CONTEXT/$DOCKERFILE" ]; then
+ echo "Error: Dockerfile '$DOCKERFILE' for service '$SERVICE' does not exist in context '$CONTEXT'."
+ return 1
+ fi
+
+ # Build and/or push based on ACTION
+ if [ "$ACTION" = "build" ]; then
+ echo "Building $SERVICE for current platform..."
+ docker build \
+ $NO_CACHE \
+ $PROGRESS \
+ $DEBUG \
+ -t "$REGISTRY/$SERVICE:$TAG" \
+ -f "$CONTEXT/$DOCKERFILE" \
+ "$CONTEXT"
+ elif [ "$ACTION" = "push" ]; then
+ echo "Building and pushing $SERVICE for multiple platforms..."
+ docker buildx build \
+ $NO_CACHE \
+ $PROGRESS \
+ $DEBUG \
+ --platform "$PLATFORMS" \
+ -t "$REGISTRY/$SERVICE:$TAG" \
+ -f "$CONTEXT/$DOCKERFILE" \
+ "$CONTEXT" \
+ --push
+ else
+ echo "Building $SERVICE for current platform..."
+ docker build \
+ $NO_CACHE \
+ $PROGRESS \
+ $DEBUG \
+ -t "$REGISTRY/$SERVICE:$TAG" \
+ -f "$CONTEXT/$DOCKERFILE" \
+ "$CONTEXT"
+
+ echo "Building and pushing $SERVICE for multiple platforms..."
+ docker buildx build \
+ $NO_CACHE \
+ $PROGRESS \
+ $DEBUG \
+ --platform "$PLATFORMS" \
+ -t "$REGISTRY/$SERVICE:$TAG" \
+ -f "$CONTEXT/$DOCKERFILE" \
+ "$CONTEXT" \
+ --push
+ fi
+}
+
+# If no arguments are provided, process all services
+if [ $# -eq 0 ]; then
+ SERVICES=()
+ while IFS= read -r line; do
+ SERVICES+=("$line")
+ done < <(yq e '.services | keys | .[]' compose_dev.yaml | grep -E '^(nginx|eveai_)')
+else
+ SERVICES=("$@")
+fi
+
+# Check if eveai_builder exists, if not create it
+if ! docker buildx inspect eveai_builder > /dev/null 2>&1; then
+ echo "Creating eveai_builder..."
+ docker buildx create --name eveai_builder
+fi
+
+# Use eveai_builder
+echo "Using eveai_builder..."
+docker buildx use eveai_builder
+
+# Loop through services
+for SERVICE in "${SERVICES[@]}"; do
+ if [[ "$SERVICE" == "nginx" || "$SERVICE" == eveai_* ]]; then
+ if process_service "$SERVICE"; then
+ echo "Successfully processed $SERVICE"
+ else
+ echo "Failed to process $SERVICE"
+ fi
+ else
+ echo "Skipping $SERVICE as it's not nginx or doesn't start with eveai_"
+ fi
+done
+
+echo "All specified services processed."
\ No newline at end of file
diff --git a/docker/compose_dev.yaml b/docker/compose_dev.yaml
new file mode 100644
index 0000000..3e2b28f
--- /dev/null
+++ b/docker/compose_dev.yaml
@@ -0,0 +1,278 @@
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker Compose reference guide at
+# https://docs.docker.com/go/compose-spec-reference/
+
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+
+x-common-variables: &common-variables
+ DB_HOST: db
+ DB_USER: luke
+ DB_PASS: 'Skywalker!'
+ DB_NAME: eveai
+ DB_PORT: '5432'
+ FLASK_ENV: development
+ FLASK_DEBUG: true
+ SECRET_KEY: '97867c1491bea5ee6a8e8436eb11bf2ba6a69ff53ab1b17ecba450d0f2e572e1'
+ SECURITY_PASSWORD_SALT: '228614859439123264035565568761433607235'
+ MAIL_USERNAME: eveai_super@flow-it.net
+ MAIL_PASSWORD: '$6xsWGbNtx$CFMQZqc*'
+ OPENAI_API_KEY: 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
+ GROQ_API_KEY: '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'
+ ANTHROPIC_API_KEY: 'sk-ant-api03-c2TmkzbReeGhXBO5JxNH6BJNylRDonc9GmZd0eRbrvyekec2'
+ PORTKEY_API_KEY: 'T2Dt4QTpgCvWxa1OftYCJtj7NcDZ'
+ JWT_SECRET_KEY: 'bsdMkmQ8ObfMD52yAFg4trrvjgjMhuIqg2fjDpD/JqvgY0ccCcmlsEnVFmR79WPiLKEA3i8a5zmejwLZKl4v9Q=='
+ API_ENCRYPTION_KEY: 'xfF5369IsredSrlrYZqkM9ZNrfUASYYS6TCcAR9UKj4='
+ MINIO_ENDPOINT: minio:9000
+ MINIO_ACCESS_KEY: minioadmin
+ MINIO_SECRET_KEY: minioadmin
+ NGINX_SERVER_NAME: 'localhost http://macstudio.ask-eve-ai-local.com/'
+
+
+networks:
+ eveai-network:
+ driver: bridge
+
+services:
+ nginx:
+ image: josakola/nginx:latest
+ build:
+ context: ..
+ dockerfile: ./docker/nginx/Dockerfile
+ platforms:
+ - linux/amd64
+ - linux/arm64
+ ports:
+ - 80:80
+ - 8080:8080
+ environment:
+ <<: *common-variables
+ volumes:
+ - ../nginx:/etc/nginx
+ - ../nginx/sites-enabled:/etc/nginx/sites-enabled
+ - ../nginx/static:/etc/nginx/static
+ - ../nginx/public:/etc/nginx/public
+ - ./logs/nginx:/var/log/nginx
+ depends_on:
+ - eveai_app
+ - eveai_chat
+ networks:
+ - eveai-network
+
+ eveai_app:
+ image: josakola/eveai_app:latest
+ build:
+ context: ..
+ dockerfile: ./docker/eveai_app/Dockerfile
+ platforms:
+ - linux/amd64
+ - linux/arm64
+ ports:
+ - 5001:5001
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_app
+ volumes:
+ - ../eveai_app:/app/eveai_app
+ - ../common:/app/common
+ - ../config:/app/config
+ - ../migrations:/app/migrations
+ - ../scripts:/app/scripts
+ - ../patched_packages:/app/patched_packages
+ - eveai_logs:/app/logs
+ depends_on:
+ db:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+ minio:
+ condition: service_healthy
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:5001/health"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+# entrypoint: ["scripts/entrypoint.sh"]
+# command: ["scripts/start_eveai_app.sh"]
+ networks:
+ - eveai-network
+
+ eveai_workers:
+ image: josakola/eveai_workers:latest
+ build:
+ context: ..
+ dockerfile: ./docker/eveai_workers/Dockerfile
+ platforms:
+ - linux/amd64
+ - linux/arm64
+# ports:
+# - 5001:5001
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_workers
+ volumes:
+ - ../eveai_workers:/app/eveai_workers
+ - ../common:/app/common
+ - ../config:/app/config
+ - ../scripts:/app/scripts
+ - ../patched_packages:/app/patched_packages
+ - eveai_logs:/app/logs
+ depends_on:
+ db:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+ minio:
+ condition: service_healthy
+# healthcheck:
+# test: [ "CMD", "curl", "-f", "http://localhost:5001/health" ]
+# interval: 10s
+# timeout: 5s
+# retries: 5
+# entrypoint: [ "sh", "-c", "scripts/entrypoint.sh" ]
+# command: [ "sh", "-c", "scripts/start_eveai_workers.sh" ]
+ networks:
+ - eveai-network
+
+ eveai_chat:
+ image: josakola/eveai_chat:latest
+ build:
+ context: ..
+ dockerfile: ./docker/eveai_chat/Dockerfile
+ platforms:
+ - linux/amd64
+ - linux/arm64
+ ports:
+ - 5002:5002
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_chat
+ volumes:
+ - ../eveai_chat:/app/eveai_chat
+ - ../common:/app/common
+ - ../config:/app/config
+ - ../scripts:/app/scripts
+ - ../patched_packages:/app/patched_packages
+ - eveai_logs:/app/logs
+ depends_on:
+ db:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+ healthcheck:
+ test: [ "CMD", "curl", "-f", "http://localhost:5002/health" ] # Adjust based on your health endpoint
+ interval: 10s
+ timeout: 5s
+ retries: 5
+# entrypoint: [ "sh", "-c", "scripts/entrypoint.sh" ]
+# command: ["sh", "-c", "scripts/start_eveai_chat.sh"]
+ networks:
+ - eveai-network
+
+ eveai_chat_workers:
+ image: josakola/eveai_chat_workers:latest
+ build:
+ context: ..
+ dockerfile: ./docker/eveai_chat_workers/Dockerfile
+ platforms:
+ - linux/amd64
+ - linux/arm64
+# ports:
+# - 5001:5001
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_chat_workers
+ volumes:
+ - ../eveai_chat_workers:/app/eveai_chat_workers
+ - ../common:/app/common
+ - ../config:/app/config
+ - ../scripts:/app/scripts
+ - ../patched_packages:/app/patched_packages
+ - eveai_logs:/app/logs
+ depends_on:
+ db:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+# healthcheck:
+# test: [ "CMD", "curl", "-f", "http://localhost:5001/health" ]
+# interval: 10s
+# timeout: 5s
+# retries: 5
+# entrypoint: [ "sh", "-c", "scripts/entrypoint.sh" ]
+# command: [ "sh", "-c", "scripts/start_eveai_chat_workers.sh" ]
+ networks:
+ - eveai-network
+
+ db:
+ hostname: db
+ image: ankane/pgvector
+ ports:
+ - 5432:5432
+ restart: always
+ environment:
+ - POSTGRES_DB=eveai
+ - POSTGRES_USER=luke
+ - POSTGRES_PASSWORD=Skywalker!
+ volumes:
+ - ./db/postgresql:/var/lib/postgresql/data
+ - ./db/init.sql:/docker-entrypoint-initdb.d/init.sql
+ healthcheck:
+ test: [ "CMD-SHELL", "pg_isready -d eveai -U luke" ]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - eveai-network
+
+ redis:
+ image: redis:7.2.5
+ restart: always
+ expose:
+ - 6379
+ volumes:
+ - ./db/redis:/data
+ healthcheck:
+ test: [ "CMD", "redis-cli", "ping" ]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - eveai-network
+
+ minio:
+ image: minio/minio
+ ports:
+ - "9000:9000"
+ - "9001:9001"
+ expose:
+ - 9000
+ volumes:
+ - ./minio/data:/data
+ - ./minio/config:/root/.minio
+ environment:
+ MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
+ MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
+ command: server /data --console-address ":9001"
+ healthcheck:
+ test: [ "CMD", "mc", "ready", "local" ]
+ interval: 30s
+ timeout: 20s
+ retries: 3
+ start_period: 30s
+ networks:
+ - eveai-network
+
+volumes:
+ minio_data:
+ eveai_logs:
+# db-data:
+# redis-data:
+# tenant-files:
+#secrets:
+# db-password:
+# file: ./db/password.txt
+
diff --git a/docker/compose_stackhero.yaml b/docker/compose_stackhero.yaml
new file mode 100644
index 0000000..0f4ac01
--- /dev/null
+++ b/docker/compose_stackhero.yaml
@@ -0,0 +1,152 @@
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker Compose reference guide at
+# https://docs.docker.com/go/compose-spec-reference/
+
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+
+x-common-variables: &common-variables
+ DB_HOST: bswnz4.stackhero-network.com
+ DB_USER: luke_skywalker
+ DB_PASS: 2MK&1rHmWEydE2rFuJLq*ls%tdkPAk2
+ DB_NAME: eveai
+ DB_PORT: '5945'
+ FLASK_ENV: production
+ FLASK_DEBUG: false
+ SECRET_KEY: '38wg8e1lvhlvcu0apr95n8o07axf244lzaa7b7djh7itrf8jnyyh1lkuco529w'
+ SECURITY_PASSWORD_SALT: '166448071751628781809462050022558634074'
+ MAIL_USERNAME: 'evie_admin@askeveai.com'
+ MAIL_PASSWORD: 's5D%R#y^v!s&6Z^i0k&'
+ REDIS_USER: eveai
+ REDIS_PASS: 'jHliZwGD36sONgbm0fc6SOpzLbknqq4RNF8K'
+ REDIS_URL: 8bciqc.stackhero-network.com
+ REDIS_PORT: '9961'
+ OPENAI_API_KEY: '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'
+ GROQ_API_KEY: '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'
+ ANTHROPIC_API_KEY: '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'
+ PORTKEY_API_KEY: 'XvmvBFIVbm76opUxA7MNP14QmdQj'
+ JWT_SECRET_KEY: '0d99e810e686ea567ef305d8e9b06195c4db482952e19276590a726cde60a408'
+ API_ENCRYPTION_KEY: 'Ly5XYWwEKiasfAwEqdEMdwR-k0vhrq6QPYd4whEROB0='
+ GRAYLOG_HOST: de4zvu.stackhero-network.com
+ GRAYLOG_PORT: '12201'
+ MINIO_ENDPOINT: 'fxwnyl.stackhero-network.com:443'
+ MINIO_ACCESS_KEY: 04JKmQln8PQpyTmMiCPc
+ MINIO_SECRET_KEY: 2PEZAD1nlpAmOyDV0TUTuJTQw1qVuYLF3A7GMs0D
+ NGINX_SERVER_NAME: 'evie.askeveai.com mxz536.stackhero-network.com'
+
+networks:
+ eveai-network:
+ driver: bridge
+
+services:
+ nginx:
+ platform: linux/amd64
+ image: josakola/nginx:latest
+ ports:
+ - 80:80
+ - 8080:8080
+ environment:
+ <<: *common-variables
+ volumes:
+# - ../nginx:/etc/nginx
+# - ../nginx/sites-enabled:/etc/nginx/sites-enabled
+# - ../nginx/static:/etc/nginx/static
+# - ../nginx/public:/etc/nginx/public
+ - eveai_logs:/var/log/nginx
+ labels:
+ - "traefik.enable=true"
+ - "traefik.http.routers.nginx.rule=Host(`evie.askeveai.com`) || Host(`mxz536.stackhero-network.com`)"
+ - "traefik.http.routers.nginx.entrypoints=web,websecure"
+ - "traefik.http.routers.nginx.tls.certresolver=letsencrypt"
+ - "traefik.http.services.nginx.loadbalancer.server.port=80"
+ depends_on:
+ - eveai_app
+ - eveai_chat
+ networks:
+ - eveai-network
+
+ eveai_app:
+ platform: linux/amd64
+ image: josakola/eveai_app:latest
+ ports:
+ - 5001:5001
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_app
+ volumes:
+ - eveai_logs:/app/logs
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:5001/health"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - eveai-network
+
+ eveai_workers:
+ platform: linux/amd64
+ image: josakola/eveai_workers:latest
+# ports:
+# - 5001:5001
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_workers
+ volumes:
+ - eveai_logs:/app/logs
+# healthcheck:
+# test: [ "CMD", "curl", "-f", "http://localhost:5001/health" ]
+# interval: 10s
+# timeout: 5s
+# retries: 5
+ networks:
+ - eveai-network
+
+ eveai_chat:
+ platform: linux/amd64
+ image: josakola/eveai_chat:latest
+ ports:
+ - 5002:5002
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_chat
+ volumes:
+ - eveai_logs:/app/logs
+ healthcheck:
+ test: [ "CMD", "curl", "-f", "http://localhost:5002/health" ] # Adjust based on your health endpoint
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - eveai-network
+
+ eveai_chat_workers:
+ platform: linux/amd64
+ image: josakola/eveai_chat_workers:latest
+# ports:
+# - 5001:5001
+ environment:
+ <<: *common-variables
+ COMPONENT_NAME: eveai_chat_workers
+ volumes:
+ - eveai_logs:/app/logs
+# healthcheck:
+# test: [ "CMD", "curl", "-f", "http://localhost:5001/health" ]
+# interval: 10s
+# timeout: 5s
+# retries: 5
+ networks:
+ - eveai-network
+
+volumes:
+ eveai_logs:
+# miniAre theo_data:
+# db-data:
+# redis-data:
+# tenant-files:
+#secrets:
+# db-password:
+# file: ./db/password.txt
+
diff --git a/docker/copy_docker_logs.sh b/docker/copy_docker_logs.sh
new file mode 100755
index 0000000..a97481d
--- /dev/null
+++ b/docker/copy_docker_logs.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Check if a volume name was provided
+if [ $# -eq 0 ]; then
+ echo "Error: No volume name provided."
+ echo "Usage: $0 "
+ exit 1
+fi
+
+# Get the volume name from the first argument
+VOLUME_NAME=$1
+
+# Define the local directory to copy logs to
+LOCAL_DIR="./stackhero/logs"
+
+# Create the local directory if it doesn't exist
+mkdir -p "$LOCAL_DIR"
+
+echo "Copying logs from Docker volume '$VOLUME_NAME' to $LOCAL_DIR"
+
+# Create a temporary container with the volume mounted
+CONTAINER_ID=$(docker run -d -v $VOLUME_NAME:/logs --name temp_log_container alpine sh -c 'tail -f /dev/null')
+
+# Check if the container was created successfully
+if [ $? -ne 0 ]; then
+ echo "Error: Failed to create temporary container."
+ exit 1
+fi
+
+# Copy files from the container to the local machine
+docker cp $CONTAINER_ID:/logs/. "$LOCAL_DIR"
+
+# Check if the copy was successful
+if [ $? -eq 0 ]; then
+ echo "Logs successfully copied to $LOCAL_DIR"
+else
+ echo "Error: Failed to copy logs from the container."
+fi
+
+# Remove the temporary container
+docker rm -f $CONTAINER_ID
+
+echo "Temporary container removed."
\ No newline at end of file
diff --git a/docker/db/init.sql b/docker/db/init.sql
new file mode 100644
index 0000000..c0b38ce
--- /dev/null
+++ b/docker/db/init.sql
@@ -0,0 +1,4 @@
+-- Initialize the eveai database and enable the pgvector extension
+CREATE EXTENSION IF NOT EXISTS vector;
+
+-- You can add other initialization SQL commands here if needed
\ No newline at end of file
diff --git a/docker/db/password.txt b/docker/db/password.txt
new file mode 100644
index 0000000..3d64987
--- /dev/null
+++ b/docker/db/password.txt
@@ -0,0 +1 @@
+Skywalker!
diff --git a/docker/docker_env_switch.sh b/docker/docker_env_switch.sh
new file mode 100755
index 0000000..8dc8517
--- /dev/null
+++ b/docker/docker_env_switch.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Function to display usage information
+usage() {
+ echo "Usage: source $0 [dev|prod]"
+ echo " dev : Switch to development environment"
+ echo " prod : Switch to production environment"
+}
+
+# Check if the script is sourced
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+ echo "Error: This script must be sourced, not executed directly."
+ echo "Please run: source $0 [dev|prod]"
+ exit 1
+fi
+
+# Check if an argument is provided
+if [ $# -eq 0 ]; then
+ usage
+ return 1
+fi
+
+# Set variables based on the environment
+case $1 in
+ dev)
+ DOCKER_CONTEXT="default"
+ COMPOSE_FILE="compose_dev.yaml"
+ ;;
+ prod)
+ DOCKER_CONTEXT="mxz536.stackhero-network.com"
+ COMPOSE_FILE="compose_stackhero.yaml"
+ ;;
+ *)
+ echo "Invalid environment. Use 'dev' or 'prod'."
+ usage
+ return 1
+ ;;
+esac
+
+# Switch Docker context
+echo "Switching to Docker context: $DOCKER_CONTEXT"
+docker context use $DOCKER_CONTEXT
+
+# Set the COMPOSE_FILE environment variable
+export COMPOSE_FILE=$COMPOSE_FILE
+echo "Set COMPOSE_FILE to $COMPOSE_FILE"
+
+# Define aliases for common Docker commands
+alias docker-compose="docker compose -f $COMPOSE_FILE"
+alias dc="docker compose -f $COMPOSE_FILE"
+alias dcup="docker compose -f $COMPOSE_FILE up -d --no-build --remove-orphans"
+alias dcdown="docker compose -f $COMPOSE_FILE down"
+alias dcps="docker compose -f $COMPOSE_FILE ps"
+alias dclogs="docker compose -f $COMPOSE_FILE logs"
+alias dcpull="docker compose -f $COMPOSE_FILE pull"
+alias dcrefresh="docker compose -f $COMPOSE_FILE pull"
+
+echo "Docker environment switched to $1"
+echo "You can now use 'docker-compose', 'dc', 'dcup', 'dcdown', 'dcps', 'dclogs' , dcpull or dcrefresh commands"
\ No newline at end of file
diff --git a/docker/eveai_app/Dockerfile b/docker/eveai_app/Dockerfile
new file mode 100644
index 0000000..c7fc9dd
--- /dev/null
+++ b/docker/eveai_app/Dockerfile
@@ -0,0 +1,70 @@
+ARG PYTHON_VERSION=3.12.3
+FROM python:${PYTHON_VERSION}-slim as base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+# Create directory for patched packages and set permissions
+RUN mkdir -p /app/patched_packages && \
+ chmod 777 /app/patched_packages
+
+# Ensure patches are applied to the application.
+ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH
+
+WORKDIR /app
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+ --disabled-password \
+ --gecos "" \
+ --home "/nonexistent" \
+ --shell "/bin/bash" \
+ --no-create-home \
+ --uid "${UID}" \
+ appuser
+
+# Install necessary packages and build tools
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ gcc \
+ postgresql-client \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create logs directory and set permissions
+RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs
+
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+
+COPY requirements.txt /app/
+RUN python -m pip install -r /app/requirements.txt
+
+# Copy the source code into the container.
+COPY eveai_app /app/eveai_app
+COPY common /app/common
+COPY config /app/config
+COPY migrations /app/migrations
+COPY scripts /app/scripts
+COPY patched_packages /app/patched_packages
+
+# Set permissions for entrypoint script
+RUN chmod 777 /app/scripts/entrypoint.sh
+
+# Set ownership of the application directory to the non-privileged user
+RUN chown -R appuser:appuser /app
+
+# Expose the port that the application listens on.
+EXPOSE 5001
+
+# Set entrypoint and command
+ENTRYPOINT ["/app/scripts/entrypoint.sh"]
+CMD ["/app/scripts/start_eveai_app.sh"]
diff --git a/docker/eveai_chat/Dockerfile b/docker/eveai_chat/Dockerfile
new file mode 100644
index 0000000..6b694b7
--- /dev/null
+++ b/docker/eveai_chat/Dockerfile
@@ -0,0 +1,69 @@
+ARG PYTHON_VERSION=3.12.3
+FROM python:${PYTHON_VERSION}-slim as base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+# Create directory for patched packages and set permissions
+RUN mkdir -p /app/patched_packages && \
+ chmod 777 /app/patched_packages
+
+# Ensure patches are applied to the application.
+ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH
+
+WORKDIR /app
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+ --disabled-password \
+ --gecos "" \
+ --home "/nonexistent" \
+ --shell "/bin/bash" \
+ --no-create-home \
+ --uid "${UID}" \
+ appuser
+
+# Install necessary packages and build tools
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ gcc \
+ postgresql-client \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create logs directory and set permissions
+RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs
+
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+
+COPY ../../requirements.txt /app/
+RUN python -m pip install -r requirements.txt
+
+# Copy the source code into the container.
+COPY eveai_chat /app/eveai_chat
+COPY common /app/common
+COPY config /app/config
+COPY scripts /app/scripts
+COPY patched_packages /app/patched_packages
+
+# Set permissions for entrypoint script
+RUN chmod 777 /app/scripts/entrypoint.sh
+
+# Set ownership of the application directory to the non-privileged user
+RUN chown -R appuser:appuser /app
+
+# Expose the port that the application listens on.
+EXPOSE 5002
+
+# Set entrypoint and command
+ENTRYPOINT ["/app/scripts/entrypoint.sh"]
+CMD ["/app/scripts/start_eveai_chat.sh"]
diff --git a/docker/eveai_chat_workers/Dockerfile b/docker/eveai_chat_workers/Dockerfile
new file mode 100644
index 0000000..b485969
--- /dev/null
+++ b/docker/eveai_chat_workers/Dockerfile
@@ -0,0 +1,67 @@
+ARG PYTHON_VERSION=3.12.3
+FROM python:${PYTHON_VERSION}-slim as base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+# Create directory for patched packages and set permissions
+RUN mkdir -p /app/patched_packages && \
+ chmod 777 /app/patched_packages
+
+# Ensure patches are applied to the application.
+ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH
+
+WORKDIR /app
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+ --disabled-password \
+ --gecos "" \
+ --home "/nonexistent" \
+ --shell "/bin/bash" \
+ --no-create-home \
+ --uid "${UID}" \
+ appuser
+
+# Install necessary packages and build tools
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ gcc \
+ postgresql-client \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create logs directory and set permissions
+RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs
+
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+
+COPY requirements.txt /app/
+RUN python -m pip install -r /app/requirements.txt
+
+# Copy the source code into the container.
+COPY eveai_chat_workers /app/eveai_chat_workers
+COPY common /app/common
+COPY config /app/config
+COPY scripts /app/scripts
+COPY patched_packages /app/patched_packages
+COPY --chown=root:root scripts/entrypoint.sh /app/scripts/
+
+# Set permissions for entrypoint script
+RUN chmod 777 /app/scripts/entrypoint.sh
+
+# Set ownership of the application directory to the non-privileged user
+RUN chown -R appuser:appuser /app
+
+# Set entrypoint and command
+ENTRYPOINT ["/app/scripts/entrypoint.sh"]
+CMD ["/app/scripts/start_eveai_chat_workers.sh"]
diff --git a/docker/eveai_workers/Dockerfile b/docker/eveai_workers/Dockerfile
new file mode 100644
index 0000000..8f32738
--- /dev/null
+++ b/docker/eveai_workers/Dockerfile
@@ -0,0 +1,70 @@
+ARG PYTHON_VERSION=3.12.3
+FROM python:${PYTHON_VERSION}-slim as base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+# Create directory for patched packages and set permissions
+RUN mkdir -p /app/patched_packages && \
+ chmod 777 /app/patched_packages
+
+# Ensure patches are applied to the application.
+ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH
+
+WORKDIR /app
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+ --disabled-password \
+ --gecos "" \
+ --home "/nonexistent" \
+ --shell "/bin/bash" \
+ --no-create-home \
+ --uid "${UID}" \
+ appuser
+
+# Install necessary packages and build tools
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ gcc \
+ postgresql-client \
+ ffmpeg \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create logs directory and set permissions
+RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs
+
+# Install Python dependencies.
+
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+
+COPY requirements.txt /app/
+RUN python -m pip install -r /app/requirements.txt
+
+# Copy the source code into the container.
+COPY eveai_workers /app/eveai_workers
+COPY common /app/common
+COPY config /app/config
+COPY scripts /app/scripts
+COPY patched_packages /app/patched_packages
+COPY --chown=root:root scripts/entrypoint.sh /app/scripts/
+
+# Set permissions for entrypoint script
+RUN chmod 777 /app/scripts/entrypoint.sh
+
+# Set ownership of the application directory to the non-privileged user
+RUN chown -R appuser:appuser /app
+
+# Set entrypoint and command
+ENTRYPOINT ["/app/scripts/entrypoint.sh"]
+CMD ["/app/scripts/start_eveai_workers.sh"]
diff --git a/docker/nginx/Dockerfile b/docker/nginx/Dockerfile
new file mode 100644
index 0000000..7328492
--- /dev/null
+++ b/docker/nginx/Dockerfile
@@ -0,0 +1,27 @@
+# Use the official Nginx image as the base image
+FROM nginx:latest
+
+# Copy the custom Nginx configuration file into the container
+COPY ../../nginx/nginx.conf /etc/nginx/nginx.conf
+
+# Copy the mime.types file into the container
+COPY ../../nginx/mime.types /etc/nginx/mime.types
+
+# Copy static & public files
+RUN mkdir -p /etc/nginx/static /etc/nginx/public
+COPY ../../nginx/static /etc/nginx/static
+COPY ../../nginx/public /etc/nginx/public
+
+# Copy site-specific configurations
+RUN mkdir -p /etc/nginx/sites-enabled
+COPY ../../nginx/sites-enabled/ /etc/nginx/sites-enabled/
+
+# Ensure log directory exists
+RUN mkdir -p /var/log/nginx
+
+# Expose the necessary port
+EXPOSE 80
+EXPOSE 8080
+
+# Start Nginx when the container starts
+# CMD ["nginx", "-g", "daemon off;"]
\ No newline at end of file
diff --git a/docker/reload-nginx.sh b/docker/reload-nginx.sh
new file mode 100755
index 0000000..6d922eb
--- /dev/null
+++ b/docker/reload-nginx.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# Check if the service name is provided
+if [ -z "$1" ]; then
+ echo "Usage: $0 "
+ exit 1
+fi
+
+SERVICE_NAME=$1
+
+# Get the container ID of the service
+CONTAINER_ID=$(docker-compose ps -q $SERVICE_NAME)
+
+# Check if the container ID is found
+if [ -z "$CONTAINER_ID" ]; then
+ echo "Service $SERVICE_NAME not found or not running."
+ exit 1
+fi
+
+# Reload Nginx inside the container
+docker exec $CONTAINER_ID nginx -s reload
+
+# Output the result
+if [ $? -eq 0 ]; then
+ echo "Nginx reloaded successfully in $SERVICE_NAME."
+else
+ echo "Failed to reload Nginx in $SERVICE_NAME."
+ exit 1
+fi
\ No newline at end of file
diff --git a/docker/stackhero_link.sh b/docker/stackhero_link.sh
new file mode 100755
index 0000000..05a5bb2
--- /dev/null
+++ b/docker/stackhero_link.sh
@@ -0,0 +1,14 @@
+# HOST is your Stackhero for Docker instance domain name (mxz536.stackhero-network.com).
+# SERVICE_ID is your Stackhero service ID.
+# CERTIFICATES_PASSWORD is the password defined in your Stackhero for Docker configuration.
+(export HOST="mxz536.stackhero-network.com"
+export SERVICE_ID="svc-rlaeva"
+export CERTIFICATES_PASSWORD="OonfQaQerGLLsWPKmnudyghFilIcPJRW"
+
+cd /tmp/ \
+ && curl -o certificates.tar https://docker:$CERTIFICATES_PASSWORD@$HOST/stackhero/docker/certificates.tar \
+ && tar -xf certificates.tar \
+ && (docker context rm -f $HOST 2> /dev/null || true) \
+ && docker context create $HOST \
+ --description "$SERVICE_ID ($HOST)" \
+ --docker "host=tcp://$HOST:2376,ca=ca.pem,cert=cert.pem,key=key.pem")
\ No newline at end of file
diff --git a/eveai_app/.DS_Store b/eveai_app/.DS_Store
index 7a2e97e..a049e9c 100644
Binary files a/eveai_app/.DS_Store and b/eveai_app/.DS_Store differ
diff --git a/eveai_app/__init__.py b/eveai_app/__init__.py
index e82ea24..630f8d9 100644
--- a/eveai_app/__init__.py
+++ b/eveai_app/__init__.py
@@ -6,7 +6,8 @@ from flask_security.signals import user_authenticated
from werkzeug.middleware.proxy_fix import ProxyFix
import logging.config
-from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session
+from common.extensions import (db, migrate, bootstrap, security, mail, login_manager, cors, csrf, session,
+ minio_client, simple_encryption)
from common.models.user import User, Role, Tenant, TenantDomain
import common.models.interaction
from config.logging_config import LOGGING
@@ -14,6 +15,7 @@ from common.utils.security import set_tenant_session_data
from .errors import register_error_handlers
from common.utils.celery_utils import make_celery, init_celery
from common.utils.template_filters import register_filters
+from config.config import get_config
def create_app(config_file=None):
@@ -22,10 +24,16 @@ def create_app(config_file=None):
# Ensure all necessary headers are handled
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1)
- if config_file is None:
- app.config.from_object('config.config.DevConfig')
- else:
- app.config.from_object(config_file)
+ environment = os.getenv('FLASK_ENV', 'development')
+ print(environment)
+
+ match environment:
+ case 'development':
+ app.config.from_object(get_config('dev'))
+ case 'production':
+ app.config.from_object(get_config('prod'))
+ case _:
+ app.config.from_object(get_config('dev'))
app.config['SESSION_KEY_PREFIX'] = 'eveai_app_'
@@ -38,13 +46,16 @@ def create_app(config_file=None):
logger = logging.getLogger(__name__)
logger.info("eveai_app starting up")
+ logger.debug("start config")
+ logger.debug(app.config)
# Register extensions
register_extensions(app)
- # Check GCloud availability
- kms_client.check_kms_access_and_latency()
+ # Configure CSRF protection
+ app.config['WTF_CSRF_CHECK_DEFAULT'] = False # Disable global CSRF protection
+ app.config['WTF_CSRF_TIME_LIMIT'] = None # Remove time limit for CSRF tokens
app.celery = make_celery(app.name, app.config)
init_celery(app.celery, app)
@@ -81,6 +92,12 @@ def create_app(config_file=None):
}
return jsonify(response), 500
+ @app.before_request
+ def before_request():
+ # app.logger.debug(f"Before request - Session ID: {session.sid}")
+ app.logger.debug(f"Before request - Session data: {session}")
+ app.logger.debug(f"Before request - Request headers: {request.headers}")
+
# Register API
register_api(app)
@@ -100,8 +117,10 @@ def register_extensions(app):
csrf.init_app(app)
login_manager.init_app(app)
cors.init_app(app)
- kms_client.init_app(app)
+ # kms_client.init_app(app)
+ simple_encryption.init_app(app)
session.init_app(app)
+ minio_client.init_app(app)
# Register Blueprints
diff --git a/eveai_app/__pycache__/__init__.cpython-311.pyc b/eveai_app/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index 9a28af5..0000000
Binary files a/eveai_app/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/eveai_app/__pycache__/__init__.cpython-312.pyc b/eveai_app/__pycache__/__init__.cpython-312.pyc
index fe6dd4b..9e1c516 100644
Binary files a/eveai_app/__pycache__/__init__.cpython-312.pyc and b/eveai_app/__pycache__/__init__.cpython-312.pyc differ
diff --git a/eveai_app/__pycache__/errors.cpython-312.pyc b/eveai_app/__pycache__/errors.cpython-312.pyc
new file mode 100644
index 0000000..ea652c7
Binary files /dev/null and b/eveai_app/__pycache__/errors.cpython-312.pyc differ
diff --git a/eveai_app/__pycache__/extensions.cpython-311.pyc b/eveai_app/__pycache__/extensions.cpython-311.pyc
deleted file mode 100644
index b127e46..0000000
Binary files a/eveai_app/__pycache__/extensions.cpython-311.pyc and /dev/null differ
diff --git a/eveai_app/__pycache__/extensions.cpython-312.pyc b/eveai_app/__pycache__/extensions.cpython-312.pyc
deleted file mode 100644
index 6f9cfe7..0000000
Binary files a/eveai_app/__pycache__/extensions.cpython-312.pyc and /dev/null differ
diff --git a/eveai_app/errors.py b/eveai_app/errors.py
index 2d9279e..4c6f0ec 100644
--- a/eveai_app/errors.py
+++ b/eveai_app/errors.py
@@ -27,9 +27,18 @@ def access_forbidden(error):
return render_template('error/403.html')
+def key_error_handler(error):
+ # Check if the KeyError is specifically for 'tenant'
+ if str(error) == "'tenant'":
+ return redirect(prefixed_url_for('security.login'))
+ # For other KeyErrors, you might want to log the error and return a generic error page
+ return render_template('error/generic.html', error_message="An unexpected error occurred"), 500
+
+
def register_error_handlers(app):
app.register_error_handler(404, not_found_error)
app.register_error_handler(500, internal_server_error)
app.register_error_handler(401, not_authorised_error)
app.register_error_handler(403, not_authorised_error)
+ app.register_error_handler(KeyError, key_error_handler)
diff --git a/eveai_app/models/__pycache__/__init__.cpython-312.pyc b/eveai_app/models/__pycache__/__init__.cpython-312.pyc
deleted file mode 100644
index 579dcfe..0000000
Binary files a/eveai_app/models/__pycache__/__init__.cpython-312.pyc and /dev/null differ
diff --git a/eveai_app/models/__pycache__/user.cpython-312.pyc b/eveai_app/models/__pycache__/user.cpython-312.pyc
deleted file mode 100644
index bf6b596..0000000
Binary files a/eveai_app/models/__pycache__/user.cpython-312.pyc and /dev/null differ
diff --git a/eveai_app/static/assets/img/.DS_Store b/eveai_app/static/assets/img/.DS_Store
deleted file mode 100644
index 08883f5..0000000
Binary files a/eveai_app/static/assets/img/.DS_Store and /dev/null differ
diff --git a/eveai_app/static/node_modules/.DS_Store b/eveai_app/static/node_modules/.DS_Store
deleted file mode 100644
index 86e7fa3..0000000
Binary files a/eveai_app/static/node_modules/.DS_Store and /dev/null differ
diff --git a/eveai_app/static/scss/material-kit-pro/.DS_Store b/eveai_app/static/scss/material-kit-pro/.DS_Store
deleted file mode 100644
index 87b0cb0..0000000
Binary files a/eveai_app/static/scss/material-kit-pro/.DS_Store and /dev/null differ
diff --git a/eveai_app/temp b/eveai_app/temp
new file mode 100644
index 0000000..3d3f011
--- /dev/null
+++ b/eveai_app/temp
@@ -0,0 +1 @@
+Settings(Settings({'deprecated_settings': set()}, {}, {'accept_content': ('json',), 'result_accept_content': None, 'enable_utc': True, 'imports': (), 'include': (), 'timezone': None, 'beat_max_loop_interval': 0, 'beat_schedule': {}, 'beat_scheduler': 'celery.beat:PersistentScheduler', 'beat_schedule_filename': 'celerybeat-schedule', 'beat_sync_every': 0, 'beat_cron_starting_deadline': None, 'broker_url': None, 'broker_read_url': None, 'broker_write_url': None, 'broker_transport': None, 'broker_transport_options': {}, 'broker_connection_timeout': 4, 'broker_connection_retry': True, 'broker_connection_retry_on_startup': None, 'broker_connection_max_retries': 100, 'broker_channel_error_retry': False, 'broker_failover_strategy': None, 'broker_heartbeat': 120, 'broker_heartbeat_checkrate': 3.0, 'broker_login_method': None, 'broker_pool_limit': 10, 'broker_use_ssl': False, 'broker_host': None, 'broker_port': None, 'broker_user': None, 'broker_password': None, 'broker_vhost': None, 'cache_backend': None, 'cache_backend_options': {}, 'cassandra_entry_ttl': None, 'cassandra_keyspace': None, 'cassandra_port': None, 'cassandra_read_consistency': None, 'cassandra_servers': None, 'cassandra_bundle_path': None, 'cassandra_table': None, 'cassandra_write_consistency': None, 'cassandra_auth_provider': None, 'cassandra_auth_kwargs': None, 'cassandra_options': {}, 's3_access_key_id': None, 's3_secret_access_key': None, 's3_bucket': None, 's3_base_path': None, 's3_endpoint_url': None, 's3_region': None, 'azureblockblob_container_name': 'celery', 'azureblockblob_retry_initial_backoff_sec': 2, 'azureblockblob_retry_increment_base': 2, 'azureblockblob_retry_max_attempts': 3, 'azureblockblob_base_path': '', 'azureblockblob_connection_timeout': 20, 'azureblockblob_read_timeout': 120, 'gcs_bucket': None, 'gcs_project': None, 'gcs_base_path': '', 'gcs_ttl': 0, 'control_queue_ttl': 300.0, 'control_queue_expires': 10.0, 'control_exchange': 'celery', 'couchbase_backend_settings': None, 'arangodb_backend_settings': None, 'mongodb_backend_settings': None, 'cosmosdbsql_database_name': 'celerydb', 'cosmosdbsql_collection_name': 'celerycol', 'cosmosdbsql_consistency_level': 'Session', 'cosmosdbsql_max_retry_attempts': 9, 'cosmosdbsql_max_retry_wait_time': 30, 'event_queue_expires': 60.0, 'event_queue_ttl': 5.0, 'event_queue_prefix': 'celeryev', 'event_serializer': 'json', 'event_exchange': 'celeryev', 'redis_backend_use_ssl': None, 'redis_db': None, 'redis_host': None, 'redis_max_connections': None, 'redis_username': None, 'redis_password': None, 'redis_port': None, 'redis_socket_timeout': 120.0, 'redis_socket_connect_timeout': None, 'redis_retry_on_timeout': False, 'redis_socket_keepalive': False, 'result_backend': None, 'result_cache_max': -1, 'result_compression': None, 'result_exchange': 'celeryresults', 'result_exchange_type': 'direct', 'result_expires': datetime.timedelta(days=1), 'result_persistent': None, 'result_extended': False, 'result_serializer': 'json', 'result_backend_transport_options': {}, 'result_chord_retry_interval': 1.0, 'result_chord_join_timeout': 3.0, 'result_backend_max_sleep_between_retries_ms': 10000, 'result_backend_max_retries': inf, 'result_backend_base_sleep_between_retries_ms': 10, 'result_backend_always_retry': False, 'elasticsearch_retry_on_timeout': None, 'elasticsearch_max_retries': None, 'elasticsearch_timeout': None, 'elasticsearch_save_meta_as_text': True, 'security_certificate': None, 'security_cert_store': None, 'security_key': None, 'security_key_password': None, 'security_digest': 'sha256', 'database_url': None, 'database_engine_options': None, 'database_short_lived_sessions': False, 'database_table_schemas': None, 'database_table_names': None, 'task_acks_late': False, 'task_acks_on_failure_or_timeout': True, 'task_always_eager': False, 'task_annotations': None, 'task_compression': None, 'task_create_missing_queues': True, 'task_inherit_parent_priority': False, 'task_default_delivery_mode': 2, 'task_default_queue': 'celery', 'task_default_exchange': None, 'task_default_exchange_type': 'direct', 'task_default_routing_key': None, 'task_default_rate_limit': None, 'task_default_priority': None, 'task_eager_propagates': False, 'task_ignore_result': False, 'task_store_eager_result': False, 'task_protocol': 2, 'task_publish_retry': True, 'task_publish_retry_policy': {'max_retries': 3, 'interval_start': 0, 'interval_max': 1, 'interval_step': 0.2}, 'task_queues': None, 'task_queue_max_priority': None, 'task_reject_on_worker_lost': None, 'task_remote_tracebacks': False, 'task_routes': None, 'task_send_sent_event': False, 'task_serializer': 'json', 'task_soft_time_limit': None, 'task_time_limit': None, 'task_store_errors_even_if_ignored': False, 'task_track_started': False, 'task_allow_error_cb_on_chord_header': False, 'worker_agent': None, 'worker_autoscaler': 'celery.worker.autoscale:Autoscaler', 'worker_cancel_long_running_tasks_on_connection_loss': False, 'worker_concurrency': None, 'worker_consumer': 'celery.worker.consumer:Consumer', 'worker_direct': False, 'worker_disable_rate_limits': False, 'worker_deduplicate_successful_tasks': False, 'worker_enable_remote_control': True, 'worker_hijack_root_logger': True, 'worker_log_color': None, 'worker_log_format': '[%(asctime)s: %(levelname)s/%(processName)s] %(message)s', 'worker_lost_wait': 10.0, 'worker_max_memory_per_child': None, 'worker_max_tasks_per_child': None, 'worker_pool': 'prefork', 'worker_pool_putlocks': True, 'worker_pool_restarts': False, 'worker_proc_alive_timeout': 4.0, 'worker_prefetch_multiplier': 4, 'worker_enable_prefetch_count_reduction': True, 'worker_redirect_stdouts': True, 'worker_redirect_stdouts_level': 'WARNING', 'worker_send_task_events': False, 'worker_state_db': None, 'worker_task_log_format': '[%(asctime)s: %(levelname)s/%(processName)s] %(task_name)s[%(task_id)s]: %(message)s', 'worker_timer': None, 'worker_timer_precision': 1.0, 'deprecated_settings': None}))
diff --git a/eveai_app/static/scss/material-kit-pro/bootstrap/.DS_Store b/eveai_app/templates/.DS_Store
similarity index 76%
rename from eveai_app/static/scss/material-kit-pro/bootstrap/.DS_Store
rename to eveai_app/templates/.DS_Store
index a223f55..bd2e771 100644
Binary files a/eveai_app/static/scss/material-kit-pro/bootstrap/.DS_Store and b/eveai_app/templates/.DS_Store differ
diff --git a/eveai_app/templates/document/add_url.html b/eveai_app/templates/document/add_url.html
index b90cca3..492fa92 100644
--- a/eveai_app/templates/document/add_url.html
+++ b/eveai_app/templates/document/add_url.html
@@ -7,7 +7,7 @@
{% block content_description %}Add a url and the corresponding document to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %}
{% block content %}
-
+{% endblock %}
+
+
+{% block content_footer %}
+
+{% endblock %}
\ No newline at end of file
diff --git a/eveai_app/templates/document/add_youtube.html b/eveai_app/templates/document/add_youtube.html
new file mode 100644
index 0000000..94d174e
--- /dev/null
+++ b/eveai_app/templates/document/add_youtube.html
@@ -0,0 +1,24 @@
+{% extends 'base.html' %}
+{% from "macros.html" import render_field %}
+
+{% block title %}Add Youtube Document{% endblock %}
+
+{% block content_title %}Add Youtube Document{% endblock %}
+{% block content_description %}Add a youtube url and the corresponding document to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %}
+
+{% block content %}
+
+{% endblock %}
+
+
+{% block content_footer %}
+
+{% endblock %}
\ No newline at end of file
diff --git a/eveai_app/templates/document/edit_document_language.html b/eveai_app/templates/document/edit_document_version.html
similarity index 66%
rename from eveai_app/templates/document/edit_document_language.html
rename to eveai_app/templates/document/edit_document_version.html
index 73d68e6..cdc3888 100644
--- a/eveai_app/templates/document/edit_document_language.html
+++ b/eveai_app/templates/document/edit_document_version.html
@@ -1,9 +1,9 @@
{% extends "base.html" %}
{% from "macros.html" import render_field %}
-{% block title %}Update Document Language{% endblock %}
+{% block title %}Update Document Version{% endblock %}
-{% block content_title %}Update Document Language{% endblock %}
-{% block content_description %}Update document language for {{ doc_details }}.{% endblock %}
+{% block content_title %}Update Document Version{% endblock %}
+{% block content_description %}Update document version for {{ doc_details }}.{% endblock %}
{% block content %}
{% endblock %}
diff --git a/eveai_app/templates/error/generic.html b/eveai_app/templates/error/generic.html
new file mode 100644
index 0000000..2c7bb71
--- /dev/null
+++ b/eveai_app/templates/error/generic.html
@@ -0,0 +1,9 @@
+{% extends "base.html" %}
+
+{% block title %}Unexpected Error{% endblock %}
+
+{% block content_title %}Internal Server error{% endblock %}
+{% block content_description %}Something unexpected happened! The administrator has been notified.{% endblock %}
+{% block content %}
+ Return home
+{% endblock %}
\ No newline at end of file
diff --git a/eveai_app/templates/head.html b/eveai_app/templates/head.html
index cf5a32e..a1a34e5 100644
--- a/eveai_app/templates/head.html
+++ b/eveai_app/templates/head.html
@@ -15,6 +15,7 @@
+
diff --git a/eveai_app/templates/header.html b/eveai_app/templates/header.html
index e1e41de..868b1be 100644
--- a/eveai_app/templates/header.html
+++ b/eveai_app/templates/header.html
@@ -1,5 +1,5 @@