Refactoring part 1
Some changes for workers, but stopped due to refactoring
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from ..extensions import db
|
from common.extensions import db
|
||||||
from .user import User, Tenant
|
from .user import User, Tenant
|
||||||
from pgvector.sqlalchemy import Vector
|
from pgvector.sqlalchemy import Vector
|
||||||
|
|
||||||
0
common/models/interaction.py
Normal file
0
common/models/interaction.py
Normal file
@@ -1,4 +1,4 @@
|
|||||||
from ..extensions import db
|
from common.extensions import db
|
||||||
from flask_security import UserMixin, RoleMixin
|
from flask_security import UserMixin, RoleMixin
|
||||||
from sqlalchemy.dialects.postgresql import ARRAY
|
from sqlalchemy.dialects.postgresql import ARRAY
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
0
common/utils/__init__.py
Normal file
0
common/utils/__init__.py
Normal file
@@ -6,7 +6,7 @@ from sqlalchemy.exc import InternalError
|
|||||||
from sqlalchemy.orm import sessionmaker, scoped_session
|
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
|
|
||||||
from ..extensions import db, migrate
|
from common.extensions import db, migrate
|
||||||
|
|
||||||
|
|
||||||
class Database:
|
class Database:
|
||||||
@@ -6,7 +6,6 @@ for handling tenant requests
|
|||||||
from flask_security import current_user
|
from flask_security import current_user
|
||||||
from flask import session
|
from flask import session
|
||||||
|
|
||||||
from ..models.user import User, Tenant
|
|
||||||
from .database import Database
|
from .database import Database
|
||||||
|
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from flask import session
|
from flask import session
|
||||||
from ..models.user import User, Tenant
|
from common.models import User, Tenant
|
||||||
|
|
||||||
|
|
||||||
# Definition of Trigger Handlers
|
# Definition of Trigger Handlers
|
||||||
@@ -10,3 +10,9 @@ def set_tenant_session_data(sender, user, **kwargs):
|
|||||||
session['default_embedding_model'] = tenant.default_embedding_model
|
session['default_embedding_model'] = tenant.default_embedding_model
|
||||||
session['default_llm_model'] = tenant.default_llm_model
|
session['default_llm_model'] = tenant.default_llm_model
|
||||||
|
|
||||||
|
|
||||||
|
def clear_tenant_session_data(sender, user, **kwargs):
|
||||||
|
session.pop('tenant', None)
|
||||||
|
session.pop('default_language', None)
|
||||||
|
session.pop('default_embedding_model', None)
|
||||||
|
session.pop('default_llm_model', None)
|
||||||
0
config/__init__.py
Normal file
0
config/__init__.py
Normal file
@@ -8,13 +8,15 @@ class Config(object):
|
|||||||
DEBUG = False
|
DEBUG = False
|
||||||
DEVELOPMENT = False
|
DEVELOPMENT = False
|
||||||
SECRET_KEY = '97867c1491bea5ee6a8e8436eb11bf2ba6a69ff53ab1b17ecba450d0f2e572e1'
|
SECRET_KEY = '97867c1491bea5ee6a8e8436eb11bf2ba6a69ff53ab1b17ecba450d0f2e572e1'
|
||||||
|
SESSION_COOKIE_SECURE = True
|
||||||
|
SESSION_COOKIE_HTTPONLY = True
|
||||||
|
|
||||||
# WTF_CSRF_ENABLED = True
|
# WTF_CSRF_ENABLED = True
|
||||||
|
|
||||||
# flask-security-too settings
|
# flask-security-too settings
|
||||||
SECURITY_PASSWORD_SALT = '228614859439123264035565568761433607235'
|
SECURITY_PASSWORD_SALT = '228614859439123264035565568761433607235'
|
||||||
# REMEMBER_COOKIE_SAMESITE = 'strict'
|
REMEMBER_COOKIE_SAMESITE = 'strict'
|
||||||
# SESSION_COOKIE_SAMESITE = 'strict'
|
SESSION_COOKIE_SAMESITE = 'strict'
|
||||||
SECURITY_CONFIRMABLE = True
|
SECURITY_CONFIRMABLE = True
|
||||||
SECURITY_TRACKABLE = True
|
SECURITY_TRACKABLE = True
|
||||||
SECURITY_PASSWORD_COMPLEXITY_CHECKER = 'zxcvbn'
|
SECURITY_PASSWORD_COMPLEXITY_CHECKER = 'zxcvbn'
|
||||||
@@ -22,6 +24,7 @@ class Config(object):
|
|||||||
SECURITY_RECOVERABLE = True
|
SECURITY_RECOVERABLE = True
|
||||||
SECURITY_EMAIL_SENDER = "eveai_super@flow-it.net"
|
SECURITY_EMAIL_SENDER = "eveai_super@flow-it.net"
|
||||||
PERMANENT_SESSION_LIFETIME = timedelta(minutes=60)
|
PERMANENT_SESSION_LIFETIME = timedelta(minutes=60)
|
||||||
|
SESSION_REFRESH_EACH_REQUEST = True
|
||||||
|
|
||||||
# flask-mailman settings
|
# flask-mailman settings
|
||||||
MAIL_SERVER = 'mail.flow-it.net'
|
MAIL_SERVER = 'mail.flow-it.net'
|
||||||
@@ -5,14 +5,13 @@ from flask_security import SQLAlchemyUserDatastore
|
|||||||
from flask_security.signals import user_authenticated
|
from flask_security.signals import user_authenticated
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
import logging.config
|
import logging.config
|
||||||
from celery import Celery
|
|
||||||
|
|
||||||
from .extensions import db, migrate, bootstrap, security, mail, login_manager, cors
|
from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors
|
||||||
from .models.user import User, Tenant, Role
|
from common.models.user import User, Role
|
||||||
from .models.document import Document, DocumentLanguage, DocumentVersion
|
from config.logging_config import LOGGING
|
||||||
from .logging_config import LOGGING
|
from common.utils.security import set_tenant_session_data
|
||||||
from .utils.security import set_tenant_session_data
|
from .errors import register_error_handlers
|
||||||
from .worker.celery_utils import init_celery
|
from eveai_workers.celery_utils import init_celery
|
||||||
|
|
||||||
|
|
||||||
def create_app(config_file=None):
|
def create_app(config_file=None):
|
||||||
@@ -20,7 +19,7 @@ def create_app(config_file=None):
|
|||||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1)
|
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1)
|
||||||
|
|
||||||
if config_file is None:
|
if config_file is None:
|
||||||
app.config.from_object('config.DevConfig')
|
app.config.from_object('config.config.DevConfig')
|
||||||
else:
|
else:
|
||||||
app.config.from_object(config_file)
|
app.config.from_object(config_file)
|
||||||
|
|
||||||
@@ -31,6 +30,10 @@ def create_app(config_file=None):
|
|||||||
|
|
||||||
logging.config.dictConfig(LOGGING)
|
logging.config.dictConfig(LOGGING)
|
||||||
register_extensions(app)
|
register_extensions(app)
|
||||||
|
|
||||||
|
# Initialize celery
|
||||||
|
init_celery(app)
|
||||||
|
|
||||||
# Setup Flask-Security-Too
|
# Setup Flask-Security-Too
|
||||||
user_datastore = SQLAlchemyUserDatastore(db, User, Role)
|
user_datastore = SQLAlchemyUserDatastore(db, User, Role)
|
||||||
security.init_app(app, user_datastore)
|
security.init_app(app, user_datastore)
|
||||||
@@ -39,6 +42,10 @@ def create_app(config_file=None):
|
|||||||
# Register Blueprints
|
# Register Blueprints
|
||||||
register_blueprints(app)
|
register_blueprints(app)
|
||||||
|
|
||||||
|
# Register Error Handlers
|
||||||
|
register_error_handlers(app)
|
||||||
|
|
||||||
|
# Debugging settings
|
||||||
if app.config['DEBUG'] is True:
|
if app.config['DEBUG'] is True:
|
||||||
app.logger.setLevel(logging.DEBUG)
|
app.logger.setLevel(logging.DEBUG)
|
||||||
mail_logger = logging.getLogger('flask_mailman')
|
mail_logger = logging.getLogger('flask_mailman')
|
||||||
@@ -79,17 +86,3 @@ def register_api(app):
|
|||||||
# from . import api
|
# from . import api
|
||||||
# app.register_blueprint(api.bp, url_prefix='/api')
|
# app.register_blueprint(api.bp, url_prefix='/api')
|
||||||
|
|
||||||
|
|
||||||
def create_celery_app(config_file=None):
|
|
||||||
app = Flask(__name__)
|
|
||||||
if config_file is None:
|
|
||||||
app.config.from_object('config.DevConfig')
|
|
||||||
else:
|
|
||||||
app.config.from_object(config_file)
|
|
||||||
|
|
||||||
celery = Celery(app.import_name)
|
|
||||||
init_celery(celery, app)
|
|
||||||
return celery
|
|
||||||
|
|
||||||
|
|
||||||
celery = create_celery_app()
|
|
||||||
|
|||||||
22
eveai_app/errors.py
Normal file
22
eveai_app/errors.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
from flask import render_template, request, jsonify
|
||||||
|
|
||||||
|
|
||||||
|
def not_found_error(error):
|
||||||
|
if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
|
||||||
|
response = jsonify({'error': 'Not found'})
|
||||||
|
response.status_code = 404
|
||||||
|
return response
|
||||||
|
return render_template('error/404.html'), 404
|
||||||
|
|
||||||
|
|
||||||
|
def internal_server_error(error):
|
||||||
|
if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
|
||||||
|
response = jsonify({'error': 'Internal server error'})
|
||||||
|
response.status_code = 500
|
||||||
|
return response
|
||||||
|
return render_template('error/500.html'), 500
|
||||||
|
|
||||||
|
|
||||||
|
def register_error_handlers(app):
|
||||||
|
app.register_error_handler(404, not_found_error)
|
||||||
|
app.register_error_handler(500, internal_server_error)
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
<span>
|
<span>
|
||||||
<div class="container mb-4">
|
<div class="container mb-4">
|
||||||
<div class="row mt-lg-n12 mt-md-n12 mt-n12 justify-content-center">
|
<div class="row mt-lg-n12 mt-md-n12 mt-n12 justify-content-center">
|
||||||
<div class="col-xl-8 col-lg-5 col-md-7 mx-auto">
|
{% block content_class %}<div class="col-xl-8 col-lg-5 col-md-7 mx-auto">{% endblock %}
|
||||||
<div class="card mt-8">
|
<div class="card mt-8">
|
||||||
<div class="card-header p-0 position-relative mt-n4 mx-3 z-index-2">
|
<div class="card-header p-0 position-relative mt-n4 mx-3 z-index-2">
|
||||||
<div class="bg-gradient-success shadow-success border-radius-lg py-3 pe-1 text-center py-4">
|
<div class="bg-gradient-success shadow-success border-radius-lg py-3 pe-1 text-center py-4">
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
{% block content_title %}Documents{% endblock %}
|
{% block content_title %}Documents{% endblock %}
|
||||||
{% block content_description %}View Documents for Tenant{% endblock %}
|
{% block content_description %}View Documents for Tenant{% endblock %}
|
||||||
|
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto">{% endblock %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="container">
|
<div class="container">
|
||||||
|
|||||||
9
eveai_app/templates/error/404.html
Normal file
9
eveai_app/templates/error/404.html
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Error 404{% endblock %}
|
||||||
|
|
||||||
|
{% block content_title %}File not Found{% endblock %}
|
||||||
|
{% block content_description %}Something unexpected happened!{% endblock %}
|
||||||
|
{% block content %}
|
||||||
|
<p><a href="{{ url_for('basic_bp.index') }}">Return home</a></p>
|
||||||
|
{% endblock %}
|
||||||
9
eveai_app/templates/error/500.html
Normal file
9
eveai_app/templates/error/500.html
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Error 500{% endblock %}
|
||||||
|
|
||||||
|
{% block content_title %}Internal Server error{% endblock %}
|
||||||
|
{% block content_description %}Something unexpected happened! The administrator has been notified.{% endblock %}
|
||||||
|
{% block content %}
|
||||||
|
<p><a href="{{ url_for('basic_bp.index') }}">Return home</a></p>
|
||||||
|
{% endblock %}
|
||||||
@@ -1,15 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
from datetime import datetime as dt, timezone as tz
|
from datetime import datetime as dt, timezone as tz
|
||||||
from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app
|
from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app
|
||||||
from flask_security import hash_password, roles_required, roles_accepted, current_user
|
from flask_security import roles_accepted, current_user
|
||||||
from sqlalchemy import desc
|
from sqlalchemy import desc
|
||||||
from sqlalchemy.orm import joinedload
|
from sqlalchemy.orm import joinedload
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
from ..models.document import Document, DocumentLanguage, DocumentVersion
|
from common.models import Document, DocumentLanguage, DocumentVersion
|
||||||
from ..extensions import db
|
from common.extensions import db
|
||||||
from .document_forms import AddDocumentForm
|
from .document_forms import AddDocumentForm
|
||||||
from ..utils.middleware import mw_before_request
|
from common.utils.middleware import mw_before_request
|
||||||
|
from eveai_workers.tasks import create_embeddings
|
||||||
|
|
||||||
|
|
||||||
document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
|
document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
|
||||||
|
|
||||||
@@ -68,11 +70,17 @@ def add_document():
|
|||||||
if error is None:
|
if error is None:
|
||||||
flash('Document added successfully.', 'success')
|
flash('Document added successfully.', 'success')
|
||||||
upload_file_for_version(new_doc_vers, file, extension)
|
upload_file_for_version(new_doc_vers, file, extension)
|
||||||
|
create_embeddings.delay(tenant_id=session['tenant']['id'],
|
||||||
|
document_version_id=new_doc_vers.id,
|
||||||
|
default_embedding_model=session['default_embedding_model'])
|
||||||
|
current_app.logger.info(f'Document processing started for tenant {session["tenant"]["id"]}, '
|
||||||
|
f'Document Version {new_doc_vers.id}')
|
||||||
|
print('Processing should start soon')
|
||||||
else:
|
else:
|
||||||
flash('Error adding document.', 'error')
|
flash('Error adding document.', 'error')
|
||||||
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {error}')
|
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {error}')
|
||||||
|
|
||||||
# return render_template('document/add_document.html', form=form)
|
return render_template('document/add_document.html', form=form)
|
||||||
|
|
||||||
|
|
||||||
@document_bp.route('/documents', methods=['GET', 'POST'])
|
@document_bp.route('/documents', methods=['GET', 'POST'])
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from flask_wtf import FlaskForm
|
|||||||
from wtforms import (StringField, PasswordField, BooleanField, SubmitField, EmailField, IntegerField, DateField,
|
from wtforms import (StringField, PasswordField, BooleanField, SubmitField, EmailField, IntegerField, DateField,
|
||||||
SelectField, SelectMultipleField, FieldList, FormField)
|
SelectField, SelectMultipleField, FieldList, FormField)
|
||||||
from wtforms.validators import DataRequired, Length, Email, NumberRange, Optional
|
from wtforms.validators import DataRequired, Length, Email, NumberRange, Optional
|
||||||
from ..models.user import User, Role
|
from common.models import Role
|
||||||
|
|
||||||
|
|
||||||
class TenantForm(FlaskForm):
|
class TenantForm(FlaskForm):
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ from datetime import datetime as dt, timezone as tz
|
|||||||
from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app
|
from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app
|
||||||
from flask_security import hash_password, roles_required, roles_accepted
|
from flask_security import hash_password, roles_required, roles_accepted
|
||||||
|
|
||||||
from ..models.user import User, Tenant, Role
|
from common.models import User, Tenant, Role
|
||||||
from ..extensions import db
|
from common.extensions import db
|
||||||
from .user_forms import TenantForm, CreateUserForm, EditUserForm
|
from .user_forms import TenantForm, CreateUserForm, EditUserForm
|
||||||
from ..utils.database import Database
|
from common.utils.database import Database
|
||||||
|
|
||||||
user_bp = Blueprint('user_bp', __name__, url_prefix='/user')
|
user_bp = Blueprint('user_bp', __name__, url_prefix='/user')
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
def init_celery(celery, app):
|
|
||||||
celery.conf.update(app.config) # Load all configurations form Flask app including Queue settings
|
|
||||||
|
|
||||||
class ContextTask(celery.Task):
|
|
||||||
def __call__(self, *args, **kwargs):
|
|
||||||
with app.app_context():
|
|
||||||
return self.run(*args, **kwargs)
|
|
||||||
|
|
||||||
celery.Task = ContextTask
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
from datetime import datetime as dt, timezone as tz
|
|
||||||
from flask import current_app
|
|
||||||
from langchain_mistralai import MistralAIEmbeddings
|
|
||||||
from langchain_openai import OpenAIEmbeddings
|
|
||||||
from langchain_community.document_loaders.pdf import PyPDFLoader
|
|
||||||
from langchain_community.vectorstores.chroma import Chroma
|
|
||||||
from langchain_text_splitters import CharacterTextSplitter
|
|
||||||
import os
|
|
||||||
|
|
||||||
from eveai_app import celery
|
|
||||||
from ..utils.database import Database
|
|
||||||
from ..models.document import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI
|
|
||||||
from .. import db
|
|
||||||
|
|
||||||
|
|
||||||
@celery.task(name='create_embeddings', queue='embeddings')
|
|
||||||
def create_embeddings(tenant_id, document_version_id, embedding_model_def):
|
|
||||||
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} '
|
|
||||||
f'with model {embedding_model_def}')
|
|
||||||
Database(tenant_id).switch_schema()
|
|
||||||
document_version = DocumentVersion.query.get(document_version_id)
|
|
||||||
if document_version is None:
|
|
||||||
current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. '
|
|
||||||
f'Document version {document_version_id} not found')
|
|
||||||
return
|
|
||||||
db.session.add(document_version)
|
|
||||||
|
|
||||||
# start processing
|
|
||||||
document_version.processing = True
|
|
||||||
document_version.processing_started_at = dt.now(tz.utc)
|
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
embedding_provider = embedding_model_def.rsplit('.', 1)[0]
|
|
||||||
embedding_model = embedding_model_def.rsplit('.', 1)[1]
|
|
||||||
# define embedding variables
|
|
||||||
match (embedding_provider, embedding_model):
|
|
||||||
case ('openai', 'text-embedding-3-small'):
|
|
||||||
embedding_model = EmbeddingSmallOpenAI()
|
|
||||||
case ('mistral', 'text-embedding-3-small'):
|
|
||||||
embedding_model = EmbeddingMistral()
|
|
||||||
|
|
||||||
match document_version.file_type:
|
|
||||||
case 'pdf':
|
|
||||||
pdf_file = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
|
||||||
document_version.file_location,
|
|
||||||
document_version.file_path)
|
|
||||||
loader = PyPDFLoader(pdf_file)
|
|
||||||
|
|
||||||
# We
|
|
||||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
||||||
documents = text_splitter.split_documents(loader.load())
|
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@celery.task(name='ask_eveAI', queue='llm_interactions')
|
|
||||||
def ask_eve_ai(query):
|
|
||||||
# Interaction logic with LLMs like GPT (Langchain API calls, etc.)
|
|
||||||
pass
|
|
||||||
23
eveai_workers/celery_utils.py
Normal file
23
eveai_workers/celery_utils.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from .tasks import create_embeddings
|
||||||
|
from celery import Celery, Task
|
||||||
|
|
||||||
|
|
||||||
|
def init_celery(app):
|
||||||
|
class ContextTask(Task):
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
with app.app_context():
|
||||||
|
return self.run(*args, **kwargs)
|
||||||
|
|
||||||
|
celery_app = Celery(app.import_name, task_cls=ContextTask)
|
||||||
|
|
||||||
|
celery_app.conf.broker_url = app.config.get('CELERY_BROKER_URL')
|
||||||
|
celery_app.conf.result_backend = app.config.get('CELERY_RESULT_BACKEND')
|
||||||
|
celery_app.conf.accept_content = app.config.get('CELERY_ACCEPT_CONTENT')
|
||||||
|
celery_app.conf.task_serializer = app.config.get('CELERY_TASK_SERIALIZER')
|
||||||
|
celery_app.conf.timezone = app.config.get('CELERY_TIMEZONE')
|
||||||
|
celery_app.conf.enable_utc = app.config.get('CELERY_ENABLE_UTC')
|
||||||
|
|
||||||
|
celery_app.set_default()
|
||||||
|
|
||||||
|
app.extensions['celery'] = celery_app
|
||||||
|
|
||||||
67
eveai_workers/tasks.py
Normal file
67
eveai_workers/tasks.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
from datetime import datetime as dt, timezone as tz
|
||||||
|
from flask import current_app
|
||||||
|
from langchain_community.document_loaders.unstructured import UnstructuredAPIFileLoader
|
||||||
|
import os
|
||||||
|
from celery import shared_task
|
||||||
|
|
||||||
|
from common.utils.database import Database
|
||||||
|
from common.models import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI
|
||||||
|
from eveai_app import db
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(name='create_embeddings', queue='embeddings')
|
||||||
|
def create_embeddings(tenant_id, document_version_id, default_embedding_model):
|
||||||
|
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} '
|
||||||
|
f'with model {default_embedding_model}')
|
||||||
|
|
||||||
|
# Ensure we are working in the correct database schema
|
||||||
|
Database(tenant_id).switch_schema()
|
||||||
|
|
||||||
|
# Retrieve document version to process
|
||||||
|
document_version = DocumentVersion.query.get(document_version_id)
|
||||||
|
if document_version is None:
|
||||||
|
current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. '
|
||||||
|
f'Document version {document_version_id} not found')
|
||||||
|
return
|
||||||
|
db.session.add(document_version)
|
||||||
|
|
||||||
|
# start processing
|
||||||
|
document_version.processing = True
|
||||||
|
document_version.processing_started_at = dt.now(tz.utc)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
embedding_provider = default_embedding_model.rsplit('.', 1)[0]
|
||||||
|
embedding_model = default_embedding_model.rsplit('.', 1)[1]
|
||||||
|
# define embedding variables
|
||||||
|
match (embedding_provider, embedding_model):
|
||||||
|
case ('openai', 'text-embedding-3-small'):
|
||||||
|
embedding_model = EmbeddingSmallOpenAI()
|
||||||
|
case ('mistral', 'text-embedding-3-small'):
|
||||||
|
embedding_model = EmbeddingMistral()
|
||||||
|
|
||||||
|
match document_version.file_type:
|
||||||
|
case 'pdf':
|
||||||
|
url = current_app.config.get('UNSTRUCTURED_FULL_URL')
|
||||||
|
api_key = current_app.config.get('UNSTRUCTURED_API_KEY')
|
||||||
|
file_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
||||||
|
document_version.file_location,
|
||||||
|
document_version.file_path)
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
loader = UnstructuredAPIFileLoader(f,
|
||||||
|
url=url,
|
||||||
|
api_key=api_key,
|
||||||
|
mode='elements',
|
||||||
|
strategy='hi-res',
|
||||||
|
include_page_breaks=True,
|
||||||
|
unique_element_ids=True,
|
||||||
|
chunking_strategy='by_title',
|
||||||
|
max_characters=3000,
|
||||||
|
)
|
||||||
|
documents = loader.load()
|
||||||
|
print(documents)
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(name='ask_eve_ai', queue='llm_interactions')
|
||||||
|
def ask_eve_ai(query):
|
||||||
|
# Interaction logic with LLMs like GPT (Langchain API calls, etc.)
|
||||||
|
pass
|
||||||
@@ -6,7 +6,7 @@ from flask import current_app
|
|||||||
from alembic import context
|
from alembic import context
|
||||||
from sqlalchemy import NullPool, engine_from_config, text
|
from sqlalchemy import NullPool, engine_from_config, text
|
||||||
|
|
||||||
from eveai_app.models.user import Tenant
|
from common.models import Tenant
|
||||||
|
|
||||||
import pgvector
|
import pgvector
|
||||||
from pgvector.sqlalchemy import Vector
|
from pgvector.sqlalchemy import Vector
|
||||||
|
|||||||
5
scripts/run_celery.py
Normal file
5
scripts/run_celery.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from eveai_app import create_app
|
||||||
|
|
||||||
|
flask_app = create_app()
|
||||||
|
celery_app = flask_app.extensions['celery']
|
||||||
|
print(flask_app.extensions)
|
||||||
@@ -1,8 +1,9 @@
|
|||||||
from eveai_app import create_app
|
from eveai_app import create_app
|
||||||
from gevent.pywsgi import WSGIServer
|
from gevent.pywsgi import WSGIServer
|
||||||
|
|
||||||
|
|
||||||
app = create_app()
|
app = create_app()
|
||||||
print(__name__)
|
celery_app = app.extensions['celery']
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print("Server starting on port 5000")
|
print("Server starting on port 5000")
|
||||||
3
scripts/start_embedding_queue.sh
Executable file
3
scripts/start_embedding_queue.sh
Executable file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
source .venv/bin/activate
|
||||||
|
celery -A app.celery_app worker --loglevel=info -Q embeddings
|
||||||
3
scripts/start_flower.sh
Executable file
3
scripts/start_flower.sh
Executable file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
source .venv/bin/activate
|
||||||
|
celery -A app.celery_app flower
|
||||||
Reference in New Issue
Block a user