diff --git a/eveai_app/models/interaction.py b/common/__init__.py similarity index 100% rename from eveai_app/models/interaction.py rename to common/__init__.py diff --git a/eveai_app/extensions.py b/common/extensions.py similarity index 100% rename from eveai_app/extensions.py rename to common/extensions.py diff --git a/eveai_app/models/__init__.py b/common/models/__init__.py similarity index 100% rename from eveai_app/models/__init__.py rename to common/models/__init__.py diff --git a/eveai_app/models/document.py b/common/models/document.py similarity index 99% rename from eveai_app/models/document.py rename to common/models/document.py index 4aeed19..1b3dfdc 100644 --- a/eveai_app/models/document.py +++ b/common/models/document.py @@ -1,4 +1,4 @@ -from ..extensions import db +from common.extensions import db from .user import User, Tenant from pgvector.sqlalchemy import Vector diff --git a/common/models/interaction.py b/common/models/interaction.py new file mode 100644 index 0000000..e69de29 diff --git a/eveai_app/models/user.py b/common/models/user.py similarity index 99% rename from eveai_app/models/user.py rename to common/models/user.py index 62e57af..86513ab 100644 --- a/eveai_app/models/user.py +++ b/common/models/user.py @@ -1,4 +1,4 @@ -from ..extensions import db +from common.extensions import db from flask_security import UserMixin, RoleMixin from sqlalchemy.dialects.postgresql import ARRAY import sqlalchemy as sa diff --git a/common/utils/__init__.py b/common/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/eveai_app/utils/app_hooks.py b/common/utils/app_hooks.py similarity index 100% rename from eveai_app/utils/app_hooks.py rename to common/utils/app_hooks.py diff --git a/eveai_app/utils/database.py b/common/utils/database.py similarity index 98% rename from eveai_app/utils/database.py rename to common/utils/database.py index eb38ce8..1cf95bc 100644 --- a/eveai_app/utils/database.py +++ b/common/utils/database.py @@ -6,7 +6,7 @@ from sqlalchemy.exc import InternalError from sqlalchemy.orm import sessionmaker, scoped_session from flask import current_app -from ..extensions import db, migrate +from common.extensions import db, migrate class Database: diff --git a/eveai_app/utils/middleware.py b/common/utils/middleware.py similarity index 94% rename from eveai_app/utils/middleware.py rename to common/utils/middleware.py index 0ba87bb..8e1d16a 100644 --- a/eveai_app/utils/middleware.py +++ b/common/utils/middleware.py @@ -6,7 +6,6 @@ for handling tenant requests from flask_security import current_user from flask import session -from ..models.user import User, Tenant from .database import Database diff --git a/eveai_app/utils/security.py b/common/utils/security.py similarity index 61% rename from eveai_app/utils/security.py rename to common/utils/security.py index 0c6b56d..823cd56 100644 --- a/eveai_app/utils/security.py +++ b/common/utils/security.py @@ -1,5 +1,5 @@ from flask import session -from ..models.user import User, Tenant +from common.models import User, Tenant # Definition of Trigger Handlers @@ -10,3 +10,9 @@ def set_tenant_session_data(sender, user, **kwargs): session['default_embedding_model'] = tenant.default_embedding_model session['default_llm_model'] = tenant.default_llm_model + +def clear_tenant_session_data(sender, user, **kwargs): + session.pop('tenant', None) + session.pop('default_language', None) + session.pop('default_embedding_model', None) + session.pop('default_llm_model', None) \ No newline at end of file diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.py b/config/config.py similarity index 94% rename from config.py rename to config/config.py index 93c452f..c5613a1 100644 --- a/config.py +++ b/config/config.py @@ -8,13 +8,15 @@ class Config(object): DEBUG = False DEVELOPMENT = False SECRET_KEY = '97867c1491bea5ee6a8e8436eb11bf2ba6a69ff53ab1b17ecba450d0f2e572e1' + SESSION_COOKIE_SECURE = True + SESSION_COOKIE_HTTPONLY = True # WTF_CSRF_ENABLED = True # flask-security-too settings SECURITY_PASSWORD_SALT = '228614859439123264035565568761433607235' - # REMEMBER_COOKIE_SAMESITE = 'strict' - # SESSION_COOKIE_SAMESITE = 'strict' + REMEMBER_COOKIE_SAMESITE = 'strict' + SESSION_COOKIE_SAMESITE = 'strict' SECURITY_CONFIRMABLE = True SECURITY_TRACKABLE = True SECURITY_PASSWORD_COMPLEXITY_CHECKER = 'zxcvbn' @@ -22,6 +24,7 @@ class Config(object): SECURITY_RECOVERABLE = True SECURITY_EMAIL_SENDER = "eveai_super@flow-it.net" PERMANENT_SESSION_LIFETIME = timedelta(minutes=60) + SESSION_REFRESH_EACH_REQUEST = True # flask-mailman settings MAIL_SERVER = 'mail.flow-it.net' diff --git a/eveai_app/logging_config.py b/config/logging_config.py similarity index 100% rename from eveai_app/logging_config.py rename to config/logging_config.py diff --git a/eveai_app/api/__init__.py b/eveai_api/__init__.py similarity index 100% rename from eveai_app/api/__init__.py rename to eveai_api/__init__.py diff --git a/eveai_app/api/auth.py b/eveai_api/auth.py similarity index 100% rename from eveai_app/api/auth.py rename to eveai_api/auth.py diff --git a/eveai_app/__init__.py b/eveai_app/__init__.py index 867b0c1..72f19a6 100644 --- a/eveai_app/__init__.py +++ b/eveai_app/__init__.py @@ -5,14 +5,13 @@ from flask_security import SQLAlchemyUserDatastore from flask_security.signals import user_authenticated from werkzeug.middleware.proxy_fix import ProxyFix import logging.config -from celery import Celery -from .extensions import db, migrate, bootstrap, security, mail, login_manager, cors -from .models.user import User, Tenant, Role -from .models.document import Document, DocumentLanguage, DocumentVersion -from .logging_config import LOGGING -from .utils.security import set_tenant_session_data -from .worker.celery_utils import init_celery +from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors +from common.models.user import User, Role +from config.logging_config import LOGGING +from common.utils.security import set_tenant_session_data +from .errors import register_error_handlers +from eveai_workers.celery_utils import init_celery def create_app(config_file=None): @@ -20,7 +19,7 @@ def create_app(config_file=None): app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1) if config_file is None: - app.config.from_object('config.DevConfig') + app.config.from_object('config.config.DevConfig') else: app.config.from_object(config_file) @@ -31,6 +30,10 @@ def create_app(config_file=None): logging.config.dictConfig(LOGGING) register_extensions(app) + + # Initialize celery + init_celery(app) + # Setup Flask-Security-Too user_datastore = SQLAlchemyUserDatastore(db, User, Role) security.init_app(app, user_datastore) @@ -39,6 +42,10 @@ def create_app(config_file=None): # Register Blueprints register_blueprints(app) + # Register Error Handlers + register_error_handlers(app) + + # Debugging settings if app.config['DEBUG'] is True: app.logger.setLevel(logging.DEBUG) mail_logger = logging.getLogger('flask_mailman') @@ -79,17 +86,3 @@ def register_api(app): # from . import api # app.register_blueprint(api.bp, url_prefix='/api') - -def create_celery_app(config_file=None): - app = Flask(__name__) - if config_file is None: - app.config.from_object('config.DevConfig') - else: - app.config.from_object(config_file) - - celery = Celery(app.import_name) - init_celery(celery, app) - return celery - - -celery = create_celery_app() diff --git a/eveai_app/errors.py b/eveai_app/errors.py new file mode 100644 index 0000000..6a1325c --- /dev/null +++ b/eveai_app/errors.py @@ -0,0 +1,22 @@ +from flask import render_template, request, jsonify + + +def not_found_error(error): + if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: + response = jsonify({'error': 'Not found'}) + response.status_code = 404 + return response + return render_template('error/404.html'), 404 + + +def internal_server_error(error): + if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: + response = jsonify({'error': 'Internal server error'}) + response.status_code = 500 + return response + return render_template('error/500.html'), 500 + + +def register_error_handlers(app): + app.register_error_handler(404, not_found_error) + app.register_error_handler(500, internal_server_error) diff --git a/eveai_app/templates/base.html b/eveai_app/templates/base.html index e721e16..da02acc 100644 --- a/eveai_app/templates/base.html +++ b/eveai_app/templates/base.html @@ -20,7 +20,7 @@
-
+ {% block content_class %}
{% endblock %}
diff --git a/eveai_app/templates/document/documents.html b/eveai_app/templates/document/documents.html index 94a239b..007fbd0 100644 --- a/eveai_app/templates/document/documents.html +++ b/eveai_app/templates/document/documents.html @@ -5,6 +5,7 @@ {% block content_title %}Documents{% endblock %} {% block content_description %}View Documents for Tenant{% endblock %} +{% block content_class %}
{% endblock %} {% block content %}
diff --git a/eveai_app/templates/error/404.html b/eveai_app/templates/error/404.html new file mode 100644 index 0000000..a91e122 --- /dev/null +++ b/eveai_app/templates/error/404.html @@ -0,0 +1,9 @@ +{% extends "base.html" %} + +{% block title %}Error 404{% endblock %} + +{% block content_title %}File not Found{% endblock %} +{% block content_description %}Something unexpected happened!{% endblock %} +{% block content %} +

Return home

+{% endblock %} \ No newline at end of file diff --git a/eveai_app/templates/error/500.html b/eveai_app/templates/error/500.html new file mode 100644 index 0000000..1598261 --- /dev/null +++ b/eveai_app/templates/error/500.html @@ -0,0 +1,9 @@ +{% extends "base.html" %} + +{% block title %}Error 500{% endblock %} + +{% block content_title %}Internal Server error{% endblock %} +{% block content_description %}Something unexpected happened! The administrator has been notified.{% endblock %} +{% block content %} +

Return home

+{% endblock %} \ No newline at end of file diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py index b2cce10..adccb5d 100644 --- a/eveai_app/views/document_views.py +++ b/eveai_app/views/document_views.py @@ -1,15 +1,17 @@ import os from datetime import datetime as dt, timezone as tz from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app -from flask_security import hash_password, roles_required, roles_accepted, current_user +from flask_security import roles_accepted, current_user from sqlalchemy import desc from sqlalchemy.orm import joinedload from werkzeug.utils import secure_filename -from ..models.document import Document, DocumentLanguage, DocumentVersion -from ..extensions import db +from common.models import Document, DocumentLanguage, DocumentVersion +from common.extensions import db from .document_forms import AddDocumentForm -from ..utils.middleware import mw_before_request +from common.utils.middleware import mw_before_request +from eveai_workers.tasks import create_embeddings + document_bp = Blueprint('document_bp', __name__, url_prefix='/document') @@ -68,11 +70,17 @@ def add_document(): if error is None: flash('Document added successfully.', 'success') upload_file_for_version(new_doc_vers, file, extension) + create_embeddings.delay(tenant_id=session['tenant']['id'], + document_version_id=new_doc_vers.id, + default_embedding_model=session['default_embedding_model']) + current_app.logger.info(f'Document processing started for tenant {session["tenant"]["id"]}, ' + f'Document Version {new_doc_vers.id}') + print('Processing should start soon') else: flash('Error adding document.', 'error') current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {error}') - # return render_template('document/add_document.html', form=form) + return render_template('document/add_document.html', form=form) @document_bp.route('/documents', methods=['GET', 'POST']) diff --git a/eveai_app/views/user_forms.py b/eveai_app/views/user_forms.py index 54c1876..41dda86 100644 --- a/eveai_app/views/user_forms.py +++ b/eveai_app/views/user_forms.py @@ -3,7 +3,7 @@ from flask_wtf import FlaskForm from wtforms import (StringField, PasswordField, BooleanField, SubmitField, EmailField, IntegerField, DateField, SelectField, SelectMultipleField, FieldList, FormField) from wtforms.validators import DataRequired, Length, Email, NumberRange, Optional -from ..models.user import User, Role +from common.models import Role class TenantForm(FlaskForm): diff --git a/eveai_app/views/user_views.py b/eveai_app/views/user_views.py index f9b1637..7e28888 100644 --- a/eveai_app/views/user_views.py +++ b/eveai_app/views/user_views.py @@ -4,10 +4,10 @@ from datetime import datetime as dt, timezone as tz from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app from flask_security import hash_password, roles_required, roles_accepted -from ..models.user import User, Tenant, Role -from ..extensions import db +from common.models import User, Tenant, Role +from common.extensions import db from .user_forms import TenantForm, CreateUserForm, EditUserForm -from ..utils.database import Database +from common.utils.database import Database user_bp = Blueprint('user_bp', __name__, url_prefix='/user') diff --git a/eveai_app/worker/celery_utils.py b/eveai_app/worker/celery_utils.py deleted file mode 100644 index 8b6fe4b..0000000 --- a/eveai_app/worker/celery_utils.py +++ /dev/null @@ -1,9 +0,0 @@ -def init_celery(celery, app): - celery.conf.update(app.config) # Load all configurations form Flask app including Queue settings - - class ContextTask(celery.Task): - def __call__(self, *args, **kwargs): - with app.app_context(): - return self.run(*args, **kwargs) - - celery.Task = ContextTask diff --git a/eveai_app/worker/tasks.py b/eveai_app/worker/tasks.py deleted file mode 100644 index 973d224..0000000 --- a/eveai_app/worker/tasks.py +++ /dev/null @@ -1,59 +0,0 @@ -from datetime import datetime as dt, timezone as tz -from flask import current_app -from langchain_mistralai import MistralAIEmbeddings -from langchain_openai import OpenAIEmbeddings -from langchain_community.document_loaders.pdf import PyPDFLoader -from langchain_community.vectorstores.chroma import Chroma -from langchain_text_splitters import CharacterTextSplitter -import os - -from eveai_app import celery -from ..utils.database import Database -from ..models.document import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI -from .. import db - - -@celery.task(name='create_embeddings', queue='embeddings') -def create_embeddings(tenant_id, document_version_id, embedding_model_def): - current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} ' - f'with model {embedding_model_def}') - Database(tenant_id).switch_schema() - document_version = DocumentVersion.query.get(document_version_id) - if document_version is None: - current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. ' - f'Document version {document_version_id} not found') - return - db.session.add(document_version) - - # start processing - document_version.processing = True - document_version.processing_started_at = dt.now(tz.utc) - db.session.commit() - - embedding_provider = embedding_model_def.rsplit('.', 1)[0] - embedding_model = embedding_model_def.rsplit('.', 1)[1] - # define embedding variables - match (embedding_provider, embedding_model): - case ('openai', 'text-embedding-3-small'): - embedding_model = EmbeddingSmallOpenAI() - case ('mistral', 'text-embedding-3-small'): - embedding_model = EmbeddingMistral() - - match document_version.file_type: - case 'pdf': - pdf_file = os.path.join(current_app.config['UPLOAD_FOLDER'], - document_version.file_location, - document_version.file_path) - loader = PyPDFLoader(pdf_file) - - # We - text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) - documents = text_splitter.split_documents(loader.load()) - - pass - - -@celery.task(name='ask_eveAI', queue='llm_interactions') -def ask_eve_ai(query): - # Interaction logic with LLMs like GPT (Langchain API calls, etc.) - pass diff --git a/eveai_workers/celery_utils.py b/eveai_workers/celery_utils.py new file mode 100644 index 0000000..fd436d0 --- /dev/null +++ b/eveai_workers/celery_utils.py @@ -0,0 +1,23 @@ +from .tasks import create_embeddings +from celery import Celery, Task + + +def init_celery(app): + class ContextTask(Task): + def __call__(self, *args, **kwargs): + with app.app_context(): + return self.run(*args, **kwargs) + + celery_app = Celery(app.import_name, task_cls=ContextTask) + + celery_app.conf.broker_url = app.config.get('CELERY_BROKER_URL') + celery_app.conf.result_backend = app.config.get('CELERY_RESULT_BACKEND') + celery_app.conf.accept_content = app.config.get('CELERY_ACCEPT_CONTENT') + celery_app.conf.task_serializer = app.config.get('CELERY_TASK_SERIALIZER') + celery_app.conf.timezone = app.config.get('CELERY_TIMEZONE') + celery_app.conf.enable_utc = app.config.get('CELERY_ENABLE_UTC') + + celery_app.set_default() + + app.extensions['celery'] = celery_app + diff --git a/eveai_workers/tasks.py b/eveai_workers/tasks.py new file mode 100644 index 0000000..1b99f88 --- /dev/null +++ b/eveai_workers/tasks.py @@ -0,0 +1,67 @@ +from datetime import datetime as dt, timezone as tz +from flask import current_app +from langchain_community.document_loaders.unstructured import UnstructuredAPIFileLoader +import os +from celery import shared_task + +from common.utils.database import Database +from common.models import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI +from eveai_app import db + + +@shared_task(name='create_embeddings', queue='embeddings') +def create_embeddings(tenant_id, document_version_id, default_embedding_model): + current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} ' + f'with model {default_embedding_model}') + + # Ensure we are working in the correct database schema + Database(tenant_id).switch_schema() + + # Retrieve document version to process + document_version = DocumentVersion.query.get(document_version_id) + if document_version is None: + current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. ' + f'Document version {document_version_id} not found') + return + db.session.add(document_version) + + # start processing + document_version.processing = True + document_version.processing_started_at = dt.now(tz.utc) + db.session.commit() + + embedding_provider = default_embedding_model.rsplit('.', 1)[0] + embedding_model = default_embedding_model.rsplit('.', 1)[1] + # define embedding variables + match (embedding_provider, embedding_model): + case ('openai', 'text-embedding-3-small'): + embedding_model = EmbeddingSmallOpenAI() + case ('mistral', 'text-embedding-3-small'): + embedding_model = EmbeddingMistral() + + match document_version.file_type: + case 'pdf': + url = current_app.config.get('UNSTRUCTURED_FULL_URL') + api_key = current_app.config.get('UNSTRUCTURED_API_KEY') + file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], + document_version.file_location, + document_version.file_path) + with open(file_path, 'rb') as f: + loader = UnstructuredAPIFileLoader(f, + url=url, + api_key=api_key, + mode='elements', + strategy='hi-res', + include_page_breaks=True, + unique_element_ids=True, + chunking_strategy='by_title', + max_characters=3000, + ) + documents = loader.load() + print(documents) + + +@shared_task(name='ask_eve_ai', queue='llm_interactions') +def ask_eve_ai(query): + # Interaction logic with LLMs like GPT (Langchain API calls, etc.) + pass diff --git a/migrations/tenant/env.py b/migrations/tenant/env.py index 1d23729..1ba7dc2 100644 --- a/migrations/tenant/env.py +++ b/migrations/tenant/env.py @@ -6,7 +6,7 @@ from flask import current_app from alembic import context from sqlalchemy import NullPool, engine_from_config, text -from eveai_app.models.user import Tenant +from common.models import Tenant import pgvector from pgvector.sqlalchemy import Vector diff --git a/scripts/run_celery.py b/scripts/run_celery.py new file mode 100644 index 0000000..8a7a44c --- /dev/null +++ b/scripts/run_celery.py @@ -0,0 +1,5 @@ +from eveai_app import create_app + +flask_app = create_app() +celery_app = flask_app.extensions['celery'] +print(flask_app.extensions) diff --git a/app.py b/scripts/run_flask_server.py similarity index 89% rename from app.py rename to scripts/run_flask_server.py index 2ae2397..a3f8786 100644 --- a/app.py +++ b/scripts/run_flask_server.py @@ -1,8 +1,9 @@ from eveai_app import create_app from gevent.pywsgi import WSGIServer + app = create_app() -print(__name__) +celery_app = app.extensions['celery'] if __name__ == '__main__': print("Server starting on port 5000") diff --git a/scripts/start_embedding_queue.sh b/scripts/start_embedding_queue.sh new file mode 100755 index 0000000..d3e6e9b --- /dev/null +++ b/scripts/start_embedding_queue.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +source .venv/bin/activate +celery -A app.celery_app worker --loglevel=info -Q embeddings \ No newline at end of file diff --git a/scripts/start_flower.sh b/scripts/start_flower.sh new file mode 100755 index 0000000..cae94e2 --- /dev/null +++ b/scripts/start_flower.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +source .venv/bin/activate +celery -A app.celery_app flower