Refactoring part 1
Some changes for workers, but stopped due to refactoring
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from ..extensions import db
|
||||
from common.extensions import db
|
||||
from .user import User, Tenant
|
||||
from pgvector.sqlalchemy import Vector
|
||||
|
||||
0
common/models/interaction.py
Normal file
0
common/models/interaction.py
Normal file
@@ -1,4 +1,4 @@
|
||||
from ..extensions import db
|
||||
from common.extensions import db
|
||||
from flask_security import UserMixin, RoleMixin
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
import sqlalchemy as sa
|
||||
0
common/utils/__init__.py
Normal file
0
common/utils/__init__.py
Normal file
@@ -6,7 +6,7 @@ from sqlalchemy.exc import InternalError
|
||||
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||
from flask import current_app
|
||||
|
||||
from ..extensions import db, migrate
|
||||
from common.extensions import db, migrate
|
||||
|
||||
|
||||
class Database:
|
||||
@@ -6,7 +6,6 @@ for handling tenant requests
|
||||
from flask_security import current_user
|
||||
from flask import session
|
||||
|
||||
from ..models.user import User, Tenant
|
||||
from .database import Database
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from flask import session
|
||||
from ..models.user import User, Tenant
|
||||
from common.models import User, Tenant
|
||||
|
||||
|
||||
# Definition of Trigger Handlers
|
||||
@@ -10,3 +10,9 @@ def set_tenant_session_data(sender, user, **kwargs):
|
||||
session['default_embedding_model'] = tenant.default_embedding_model
|
||||
session['default_llm_model'] = tenant.default_llm_model
|
||||
|
||||
|
||||
def clear_tenant_session_data(sender, user, **kwargs):
|
||||
session.pop('tenant', None)
|
||||
session.pop('default_language', None)
|
||||
session.pop('default_embedding_model', None)
|
||||
session.pop('default_llm_model', None)
|
||||
0
config/__init__.py
Normal file
0
config/__init__.py
Normal file
@@ -8,13 +8,15 @@ class Config(object):
|
||||
DEBUG = False
|
||||
DEVELOPMENT = False
|
||||
SECRET_KEY = '97867c1491bea5ee6a8e8436eb11bf2ba6a69ff53ab1b17ecba450d0f2e572e1'
|
||||
SESSION_COOKIE_SECURE = True
|
||||
SESSION_COOKIE_HTTPONLY = True
|
||||
|
||||
# WTF_CSRF_ENABLED = True
|
||||
|
||||
# flask-security-too settings
|
||||
SECURITY_PASSWORD_SALT = '228614859439123264035565568761433607235'
|
||||
# REMEMBER_COOKIE_SAMESITE = 'strict'
|
||||
# SESSION_COOKIE_SAMESITE = 'strict'
|
||||
REMEMBER_COOKIE_SAMESITE = 'strict'
|
||||
SESSION_COOKIE_SAMESITE = 'strict'
|
||||
SECURITY_CONFIRMABLE = True
|
||||
SECURITY_TRACKABLE = True
|
||||
SECURITY_PASSWORD_COMPLEXITY_CHECKER = 'zxcvbn'
|
||||
@@ -22,6 +24,7 @@ class Config(object):
|
||||
SECURITY_RECOVERABLE = True
|
||||
SECURITY_EMAIL_SENDER = "eveai_super@flow-it.net"
|
||||
PERMANENT_SESSION_LIFETIME = timedelta(minutes=60)
|
||||
SESSION_REFRESH_EACH_REQUEST = True
|
||||
|
||||
# flask-mailman settings
|
||||
MAIL_SERVER = 'mail.flow-it.net'
|
||||
@@ -5,14 +5,13 @@ from flask_security import SQLAlchemyUserDatastore
|
||||
from flask_security.signals import user_authenticated
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
import logging.config
|
||||
from celery import Celery
|
||||
|
||||
from .extensions import db, migrate, bootstrap, security, mail, login_manager, cors
|
||||
from .models.user import User, Tenant, Role
|
||||
from .models.document import Document, DocumentLanguage, DocumentVersion
|
||||
from .logging_config import LOGGING
|
||||
from .utils.security import set_tenant_session_data
|
||||
from .worker.celery_utils import init_celery
|
||||
from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors
|
||||
from common.models.user import User, Role
|
||||
from config.logging_config import LOGGING
|
||||
from common.utils.security import set_tenant_session_data
|
||||
from .errors import register_error_handlers
|
||||
from eveai_workers.celery_utils import init_celery
|
||||
|
||||
|
||||
def create_app(config_file=None):
|
||||
@@ -20,7 +19,7 @@ def create_app(config_file=None):
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1)
|
||||
|
||||
if config_file is None:
|
||||
app.config.from_object('config.DevConfig')
|
||||
app.config.from_object('config.config.DevConfig')
|
||||
else:
|
||||
app.config.from_object(config_file)
|
||||
|
||||
@@ -31,6 +30,10 @@ def create_app(config_file=None):
|
||||
|
||||
logging.config.dictConfig(LOGGING)
|
||||
register_extensions(app)
|
||||
|
||||
# Initialize celery
|
||||
init_celery(app)
|
||||
|
||||
# Setup Flask-Security-Too
|
||||
user_datastore = SQLAlchemyUserDatastore(db, User, Role)
|
||||
security.init_app(app, user_datastore)
|
||||
@@ -39,6 +42,10 @@ def create_app(config_file=None):
|
||||
# Register Blueprints
|
||||
register_blueprints(app)
|
||||
|
||||
# Register Error Handlers
|
||||
register_error_handlers(app)
|
||||
|
||||
# Debugging settings
|
||||
if app.config['DEBUG'] is True:
|
||||
app.logger.setLevel(logging.DEBUG)
|
||||
mail_logger = logging.getLogger('flask_mailman')
|
||||
@@ -79,17 +86,3 @@ def register_api(app):
|
||||
# from . import api
|
||||
# app.register_blueprint(api.bp, url_prefix='/api')
|
||||
|
||||
|
||||
def create_celery_app(config_file=None):
|
||||
app = Flask(__name__)
|
||||
if config_file is None:
|
||||
app.config.from_object('config.DevConfig')
|
||||
else:
|
||||
app.config.from_object(config_file)
|
||||
|
||||
celery = Celery(app.import_name)
|
||||
init_celery(celery, app)
|
||||
return celery
|
||||
|
||||
|
||||
celery = create_celery_app()
|
||||
|
||||
22
eveai_app/errors.py
Normal file
22
eveai_app/errors.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from flask import render_template, request, jsonify
|
||||
|
||||
|
||||
def not_found_error(error):
|
||||
if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
|
||||
response = jsonify({'error': 'Not found'})
|
||||
response.status_code = 404
|
||||
return response
|
||||
return render_template('error/404.html'), 404
|
||||
|
||||
|
||||
def internal_server_error(error):
|
||||
if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
|
||||
response = jsonify({'error': 'Internal server error'})
|
||||
response.status_code = 500
|
||||
return response
|
||||
return render_template('error/500.html'), 500
|
||||
|
||||
|
||||
def register_error_handlers(app):
|
||||
app.register_error_handler(404, not_found_error)
|
||||
app.register_error_handler(500, internal_server_error)
|
||||
@@ -20,7 +20,7 @@
|
||||
<span>
|
||||
<div class="container mb-4">
|
||||
<div class="row mt-lg-n12 mt-md-n12 mt-n12 justify-content-center">
|
||||
<div class="col-xl-8 col-lg-5 col-md-7 mx-auto">
|
||||
{% block content_class %}<div class="col-xl-8 col-lg-5 col-md-7 mx-auto">{% endblock %}
|
||||
<div class="card mt-8">
|
||||
<div class="card-header p-0 position-relative mt-n4 mx-3 z-index-2">
|
||||
<div class="bg-gradient-success shadow-success border-radius-lg py-3 pe-1 text-center py-4">
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
{% block content_title %}Documents{% endblock %}
|
||||
{% block content_description %}View Documents for Tenant{% endblock %}
|
||||
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto">{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
|
||||
9
eveai_app/templates/error/404.html
Normal file
9
eveai_app/templates/error/404.html
Normal file
@@ -0,0 +1,9 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Error 404{% endblock %}
|
||||
|
||||
{% block content_title %}File not Found{% endblock %}
|
||||
{% block content_description %}Something unexpected happened!{% endblock %}
|
||||
{% block content %}
|
||||
<p><a href="{{ url_for('basic_bp.index') }}">Return home</a></p>
|
||||
{% endblock %}
|
||||
9
eveai_app/templates/error/500.html
Normal file
9
eveai_app/templates/error/500.html
Normal file
@@ -0,0 +1,9 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Error 500{% endblock %}
|
||||
|
||||
{% block content_title %}Internal Server error{% endblock %}
|
||||
{% block content_description %}Something unexpected happened! The administrator has been notified.{% endblock %}
|
||||
{% block content %}
|
||||
<p><a href="{{ url_for('basic_bp.index') }}">Return home</a></p>
|
||||
{% endblock %}
|
||||
@@ -1,15 +1,17 @@
|
||||
import os
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app
|
||||
from flask_security import hash_password, roles_required, roles_accepted, current_user
|
||||
from flask_security import roles_accepted, current_user
|
||||
from sqlalchemy import desc
|
||||
from sqlalchemy.orm import joinedload
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from ..models.document import Document, DocumentLanguage, DocumentVersion
|
||||
from ..extensions import db
|
||||
from common.models import Document, DocumentLanguage, DocumentVersion
|
||||
from common.extensions import db
|
||||
from .document_forms import AddDocumentForm
|
||||
from ..utils.middleware import mw_before_request
|
||||
from common.utils.middleware import mw_before_request
|
||||
from eveai_workers.tasks import create_embeddings
|
||||
|
||||
|
||||
document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
|
||||
|
||||
@@ -68,11 +70,17 @@ def add_document():
|
||||
if error is None:
|
||||
flash('Document added successfully.', 'success')
|
||||
upload_file_for_version(new_doc_vers, file, extension)
|
||||
create_embeddings.delay(tenant_id=session['tenant']['id'],
|
||||
document_version_id=new_doc_vers.id,
|
||||
default_embedding_model=session['default_embedding_model'])
|
||||
current_app.logger.info(f'Document processing started for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}')
|
||||
print('Processing should start soon')
|
||||
else:
|
||||
flash('Error adding document.', 'error')
|
||||
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {error}')
|
||||
|
||||
# return render_template('document/add_document.html', form=form)
|
||||
return render_template('document/add_document.html', form=form)
|
||||
|
||||
|
||||
@document_bp.route('/documents', methods=['GET', 'POST'])
|
||||
|
||||
@@ -3,7 +3,7 @@ from flask_wtf import FlaskForm
|
||||
from wtforms import (StringField, PasswordField, BooleanField, SubmitField, EmailField, IntegerField, DateField,
|
||||
SelectField, SelectMultipleField, FieldList, FormField)
|
||||
from wtforms.validators import DataRequired, Length, Email, NumberRange, Optional
|
||||
from ..models.user import User, Role
|
||||
from common.models import Role
|
||||
|
||||
|
||||
class TenantForm(FlaskForm):
|
||||
|
||||
@@ -4,10 +4,10 @@ from datetime import datetime as dt, timezone as tz
|
||||
from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app
|
||||
from flask_security import hash_password, roles_required, roles_accepted
|
||||
|
||||
from ..models.user import User, Tenant, Role
|
||||
from ..extensions import db
|
||||
from common.models import User, Tenant, Role
|
||||
from common.extensions import db
|
||||
from .user_forms import TenantForm, CreateUserForm, EditUserForm
|
||||
from ..utils.database import Database
|
||||
from common.utils.database import Database
|
||||
|
||||
user_bp = Blueprint('user_bp', __name__, url_prefix='/user')
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
def init_celery(celery, app):
|
||||
celery.conf.update(app.config) # Load all configurations form Flask app including Queue settings
|
||||
|
||||
class ContextTask(celery.Task):
|
||||
def __call__(self, *args, **kwargs):
|
||||
with app.app_context():
|
||||
return self.run(*args, **kwargs)
|
||||
|
||||
celery.Task = ContextTask
|
||||
@@ -1,59 +0,0 @@
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
from flask import current_app
|
||||
from langchain_mistralai import MistralAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.document_loaders.pdf import PyPDFLoader
|
||||
from langchain_community.vectorstores.chroma import Chroma
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
import os
|
||||
|
||||
from eveai_app import celery
|
||||
from ..utils.database import Database
|
||||
from ..models.document import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI
|
||||
from .. import db
|
||||
|
||||
|
||||
@celery.task(name='create_embeddings', queue='embeddings')
|
||||
def create_embeddings(tenant_id, document_version_id, embedding_model_def):
|
||||
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} '
|
||||
f'with model {embedding_model_def}')
|
||||
Database(tenant_id).switch_schema()
|
||||
document_version = DocumentVersion.query.get(document_version_id)
|
||||
if document_version is None:
|
||||
current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. '
|
||||
f'Document version {document_version_id} not found')
|
||||
return
|
||||
db.session.add(document_version)
|
||||
|
||||
# start processing
|
||||
document_version.processing = True
|
||||
document_version.processing_started_at = dt.now(tz.utc)
|
||||
db.session.commit()
|
||||
|
||||
embedding_provider = embedding_model_def.rsplit('.', 1)[0]
|
||||
embedding_model = embedding_model_def.rsplit('.', 1)[1]
|
||||
# define embedding variables
|
||||
match (embedding_provider, embedding_model):
|
||||
case ('openai', 'text-embedding-3-small'):
|
||||
embedding_model = EmbeddingSmallOpenAI()
|
||||
case ('mistral', 'text-embedding-3-small'):
|
||||
embedding_model = EmbeddingMistral()
|
||||
|
||||
match document_version.file_type:
|
||||
case 'pdf':
|
||||
pdf_file = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
||||
document_version.file_location,
|
||||
document_version.file_path)
|
||||
loader = PyPDFLoader(pdf_file)
|
||||
|
||||
# We
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
documents = text_splitter.split_documents(loader.load())
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@celery.task(name='ask_eveAI', queue='llm_interactions')
|
||||
def ask_eve_ai(query):
|
||||
# Interaction logic with LLMs like GPT (Langchain API calls, etc.)
|
||||
pass
|
||||
23
eveai_workers/celery_utils.py
Normal file
23
eveai_workers/celery_utils.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from .tasks import create_embeddings
|
||||
from celery import Celery, Task
|
||||
|
||||
|
||||
def init_celery(app):
|
||||
class ContextTask(Task):
|
||||
def __call__(self, *args, **kwargs):
|
||||
with app.app_context():
|
||||
return self.run(*args, **kwargs)
|
||||
|
||||
celery_app = Celery(app.import_name, task_cls=ContextTask)
|
||||
|
||||
celery_app.conf.broker_url = app.config.get('CELERY_BROKER_URL')
|
||||
celery_app.conf.result_backend = app.config.get('CELERY_RESULT_BACKEND')
|
||||
celery_app.conf.accept_content = app.config.get('CELERY_ACCEPT_CONTENT')
|
||||
celery_app.conf.task_serializer = app.config.get('CELERY_TASK_SERIALIZER')
|
||||
celery_app.conf.timezone = app.config.get('CELERY_TIMEZONE')
|
||||
celery_app.conf.enable_utc = app.config.get('CELERY_ENABLE_UTC')
|
||||
|
||||
celery_app.set_default()
|
||||
|
||||
app.extensions['celery'] = celery_app
|
||||
|
||||
67
eveai_workers/tasks.py
Normal file
67
eveai_workers/tasks.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
from flask import current_app
|
||||
from langchain_community.document_loaders.unstructured import UnstructuredAPIFileLoader
|
||||
import os
|
||||
from celery import shared_task
|
||||
|
||||
from common.utils.database import Database
|
||||
from common.models import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI
|
||||
from eveai_app import db
|
||||
|
||||
|
||||
@shared_task(name='create_embeddings', queue='embeddings')
|
||||
def create_embeddings(tenant_id, document_version_id, default_embedding_model):
|
||||
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} '
|
||||
f'with model {default_embedding_model}')
|
||||
|
||||
# Ensure we are working in the correct database schema
|
||||
Database(tenant_id).switch_schema()
|
||||
|
||||
# Retrieve document version to process
|
||||
document_version = DocumentVersion.query.get(document_version_id)
|
||||
if document_version is None:
|
||||
current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. '
|
||||
f'Document version {document_version_id} not found')
|
||||
return
|
||||
db.session.add(document_version)
|
||||
|
||||
# start processing
|
||||
document_version.processing = True
|
||||
document_version.processing_started_at = dt.now(tz.utc)
|
||||
db.session.commit()
|
||||
|
||||
embedding_provider = default_embedding_model.rsplit('.', 1)[0]
|
||||
embedding_model = default_embedding_model.rsplit('.', 1)[1]
|
||||
# define embedding variables
|
||||
match (embedding_provider, embedding_model):
|
||||
case ('openai', 'text-embedding-3-small'):
|
||||
embedding_model = EmbeddingSmallOpenAI()
|
||||
case ('mistral', 'text-embedding-3-small'):
|
||||
embedding_model = EmbeddingMistral()
|
||||
|
||||
match document_version.file_type:
|
||||
case 'pdf':
|
||||
url = current_app.config.get('UNSTRUCTURED_FULL_URL')
|
||||
api_key = current_app.config.get('UNSTRUCTURED_API_KEY')
|
||||
file_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
||||
document_version.file_location,
|
||||
document_version.file_path)
|
||||
with open(file_path, 'rb') as f:
|
||||
loader = UnstructuredAPIFileLoader(f,
|
||||
url=url,
|
||||
api_key=api_key,
|
||||
mode='elements',
|
||||
strategy='hi-res',
|
||||
include_page_breaks=True,
|
||||
unique_element_ids=True,
|
||||
chunking_strategy='by_title',
|
||||
max_characters=3000,
|
||||
)
|
||||
documents = loader.load()
|
||||
print(documents)
|
||||
|
||||
|
||||
@shared_task(name='ask_eve_ai', queue='llm_interactions')
|
||||
def ask_eve_ai(query):
|
||||
# Interaction logic with LLMs like GPT (Langchain API calls, etc.)
|
||||
pass
|
||||
@@ -6,7 +6,7 @@ from flask import current_app
|
||||
from alembic import context
|
||||
from sqlalchemy import NullPool, engine_from_config, text
|
||||
|
||||
from eveai_app.models.user import Tenant
|
||||
from common.models import Tenant
|
||||
|
||||
import pgvector
|
||||
from pgvector.sqlalchemy import Vector
|
||||
|
||||
5
scripts/run_celery.py
Normal file
5
scripts/run_celery.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from eveai_app import create_app
|
||||
|
||||
flask_app = create_app()
|
||||
celery_app = flask_app.extensions['celery']
|
||||
print(flask_app.extensions)
|
||||
@@ -1,8 +1,9 @@
|
||||
from eveai_app import create_app
|
||||
from gevent.pywsgi import WSGIServer
|
||||
|
||||
|
||||
app = create_app()
|
||||
print(__name__)
|
||||
celery_app = app.extensions['celery']
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Server starting on port 5000")
|
||||
3
scripts/start_embedding_queue.sh
Executable file
3
scripts/start_embedding_queue.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
source .venv/bin/activate
|
||||
celery -A app.celery_app worker --loglevel=info -Q embeddings
|
||||
3
scripts/start_flower.sh
Executable file
3
scripts/start_flower.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
source .venv/bin/activate
|
||||
celery -A app.celery_app flower
|
||||
Reference in New Issue
Block a user