Refactoring part 1

Some changes for workers, but stopped due to refactoring
This commit is contained in:
Josako
2024-05-06 21:30:07 +02:00
parent d925477e68
commit 8e5ad5f312
34 changed files with 193 additions and 109 deletions

0
common/__init__.py Normal file
View File

17
common/extensions.py Normal file
View File

@@ -0,0 +1,17 @@
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
from flask_bootstrap import Bootstrap
from flask_security import Security
from flask_mailman import Mail
from flask_login import LoginManager
from flask_cors import CORS
# Create extensions
db = SQLAlchemy()
migrate = Migrate()
bootstrap = Bootstrap()
security = Security()
mail = Mail()
login_manager = LoginManager()
cors = CORS()

View File

@@ -0,0 +1,3 @@

105
common/models/document.py Normal file
View File

@@ -0,0 +1,105 @@
from common.extensions import db
from .user import User, Tenant
from pgvector.sqlalchemy import Vector
class Document(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100), nullable=False)
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
languages = db.relationship('DocumentLanguage', backref='document', lazy=True)
def __repr__(self):
return f"<Document {self.id}: {self.name}>"
class DocumentLanguage(db.Model):
id = db.Column(db.Integer, primary_key=True)
document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False)
language = db.Column(db.String(2), nullable=False)
latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
versions = db.relationship(
'DocumentVersion',
backref='document_language',
lazy='joined',
foreign_keys='DocumentVersion.doc_lang_id'
)
latest_version = db.relationship(
'DocumentVersion',
uselist=False,
foreign_keys=[latest_version_id]
)
def __repr__(self):
return f"<DocumentLanguage {self.document_id}.{self.language}>"
class DocumentVersion(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_lang_id = db.Column(db.Integer, db.ForeignKey(DocumentLanguage.id), nullable=False)
url = db.Column(db.String(200), nullable=True)
file_location = db.Column(db.String(255), nullable=True)
file_name = db.Column(db.String(200), nullable=True)
file_type = db.Column(db.String(20), nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id))
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Processing Information
processing = db.Column(db.Boolean, nullable=False, default=False)
processing_started_at = db.Column(db.DateTime, nullable=True)
processing_finished_at = db.Column(db.DateTime, nullable=True)
processing_error = db.Column(db.String(255), nullable=True)
# Relations
embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True)
def __repr__(self):
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
def calc_file_location(self):
return f"{self.document_language.document.tenant_id}/{self.document_language.document.id}/{self.document_language.language}"
def calc_file_name(self):
return f"{self.id}.{self.file_type}"
class EmbeddingMistral(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
# 1024 is the MISTRAL Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(1024), nullable=False)
class EmbeddingSmallOpenAI(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
# 1536 is the OpenAI Small Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(1536), nullable=False)

View File

115
common/models/user.py Normal file
View File

@@ -0,0 +1,115 @@
from common.extensions import db
from flask_security import UserMixin, RoleMixin
from sqlalchemy.dialects.postgresql import ARRAY
import sqlalchemy as sa
class Tenant(db.Model):
"""Tenant model"""
__bind_key__ = 'public'
__table_args__ = {'schema': 'public'}
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
# company Information
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(80), unique=True, nullable=False)
website = db.Column(db.String(255), nullable=True)
# language information
default_language = db.Column(db.String(2), nullable=True)
allowed_languages = db.Column(ARRAY(sa.String(2)), nullable=True)
# LLM specific choices
default_embedding_model = db.Column(db.String(50), nullable=True)
allowed_embedding_models = db.Column(ARRAY(sa.String(50)), nullable=True)
default_llm_model = db.Column(db.String(50), nullable=True)
allowed_llm_models = db.Column(ARRAY(sa.String(50)), nullable=True)
# Licensing Information
license_start_date = db.Column(db.Date, nullable=True)
license_end_date = db.Column(db.Date, nullable=True)
allowed_monthly_interactions = db.Column(db.Integer, nullable=True)
# Relations
users = db.relationship('User', backref='tenant')
def __repr__(self):
return f"<Tenant {self.id}: {self.name}>"
def to_dict(self):
return {
'id': self.id,
'name': self.name,
'website': self.website,
'default_language': self.default_language,
'allowed_languages': self.allowed_languages,
'default_embedding_model': self.default_embedding_model,
'allowed_embedding_models': self.allowed_embedding_models,
'default_llm_model': self.default_llm_model,
'allowed_llm_models': self.allowed_llm_models,
'license_start_date': self.license_start_date,
'license_end_date': self.license_end_date,
'allowed_monthly_interactions': self.allowed_monthly_interactions
}
class Role(db.Model, RoleMixin):
__bind_key__ = 'public'
__table_args__ = {'schema': 'public'}
id = db.Column(db.Integer(), primary_key=True)
name = db.Column(db.String(80), unique=True)
description = db.Column(db.String(255))
class RolesUsers(db.Model):
__bind_key__ = 'public'
__table_args__ = {'schema': 'public'}
user_id = db.Column(db.Integer(), db.ForeignKey('public.user.id', ondelete='CASCADE'), primary_key=True)
role_id = db.Column(db.Integer(), db.ForeignKey('public.role.id', ondelete='CASCADE'), primary_key=True)
class User(db.Model, UserMixin):
"""User model"""
__bind_key__ = 'public'
__table_args__ = {'schema': 'public'}
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
# User Information
id = db.Column(db.Integer, primary_key=True)
user_name = db.Column(db.String(80), unique=True, nullable=False)
email = db.Column(db.String(255), unique=True, nullable=False)
password = db.Column(db.String(255), nullable=False)
first_name = db.Column(db.String(80), nullable=False)
last_name = db.Column(db.String(80), nullable=False)
is_active = db.Column(db.Boolean, default=True)
active = db.Column(db.Boolean)
fs_uniquifier = db.Column(db.String(255), unique=True, nullable=False)
confirmed_at = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.Date, nullable=True)
# Security Trackable Information
last_login_at = db.Column(db.DateTime, nullable=True)
current_login_at = db.Column(db.DateTime, nullable=True)
last_login_ip = db.Column(db.String(255), nullable=True)
current_login_ip = db.Column(db.String(255), nullable=True)
login_count = db.Column(db.Integer, nullable=False, default=0)
# Relations
roles = db.relationship('Role', secondary=RolesUsers.__table__, backref=db.backref('users', lazy='dynamic'))
tenant_id = db.Column(db.Integer, db.ForeignKey('public.tenant.id'), nullable=False)
def __repr__(self):
return '<User %r>' % self.user_name
def has_roles(self, *args):
return any(role.name in args for role in self.roles)

0
common/utils/__init__.py Normal file
View File

12
common/utils/app_hooks.py Normal file
View File

@@ -0,0 +1,12 @@
from flask import session
def make_session_permanent():
session.permanent = True # Refresh the session timeout on every request
# if 'user_id' in session and session.get('was_authenticated'):
# if session.modified: # Check if the session was modified
# session['was_authenticated'] = True
# else:
# session.pop('user_id', None) # Clear session
# session.pop('was_authenticated', None)
# return redirect(url_for('login')) # Redirect to login page if session expired

91
common/utils/database.py Normal file
View File

@@ -0,0 +1,91 @@
"""Database related functions"""
from os import popen
from sqlalchemy import text
from sqlalchemy.schema import CreateSchema
from sqlalchemy.exc import InternalError
from sqlalchemy.orm import sessionmaker, scoped_session
from flask import current_app
from common.extensions import db, migrate
class Database:
"""used for managing tenant databases related operations"""
def __init__(self, tenant: str) -> None:
self.schema = str(tenant)
def get_engine(self):
"""create new schema engine"""
return db.engine.execution_options(
schema_translate_map={None: self.schema}
)
def get_session(self):
"""To get session of tenant/public database schema for quick use
returns:
session: session of tenant/public database schema
"""
return scoped_session(
sessionmaker(bind=self.get_engine(), expire_on_commit=True)
)
def create_schema(self):
"""create new database schema, mostly used on tenant creation"""
try:
db.session.execute(CreateSchema(self.schema))
db.session.execute(text(f"CREATE EXTENSION IF NOT EXISTS pgvector SCHEMA {self.schema}"))
db.session.commit()
except InternalError as e:
db.session.rollback()
db.session.close()
current_app.logger.error(f"Error creating schema {self.schema}: {e.args}")
def create_tables(self):
"""create tables in for schema"""
db.metadata.create_all(self.get_engine())
def switch_schema(self):
"""switch between tenant/public database schema"""
db.session.execute(text(f'set search_path to "{self.schema}"'))
db.session.commit()
def migrate_tenant_schema(self):
"""migrate tenant database schema for new tenant"""
# Get the current revision for a database.
# NOTE: using popen may have a minor performance impact on the application
# you can store it in a different table in public schema and use it from there
# may be a faster approach
# last_revision = heads(directory="migrations/tenant", verbose=True, resolve_dependencies=False)
last_revision = popen(".venv/bin/flask db heads -d migrations/tenant").read()
print("LAST REVISION")
print(last_revision)
last_revision = last_revision.splitlines()[-1].split(" ")[0]
# creating revision table in tenant schema
session = self.get_session()
session.execute(
text(
f'CREATE TABLE "{self.schema}".alembic_version (version_num '
"VARCHAR(32) NOT NULL)"
)
)
session.commit()
# Insert last revision to alembic_version table
session.execute(
text(
f'INSERT INTO "{self.schema}".alembic_version (version_num) '
"VALUES (:version)"
),
{"version": last_revision},
)
session.commit()
session.close()
def create_tenant_schema(self):
"""create tenant used for creating new schema and its tables"""
self.create_schema()
self.create_tables()
self.migrate_tenant_schema()

View File

@@ -0,0 +1,28 @@
"""Middleware for the API
for handling tenant requests
"""
from flask_security import current_user
from flask import session
from .database import Database
def mw_before_request():
"""Before request
switch tenant schema
"""
tenant_id = session['tenant']['id']
if not tenant_id:
return {"message": "You are not logged into any tenant"}, 403
# user = User.query.get(current_user.id)
if current_user.has_roles(['Super User']) or current_user.tenant_id == tenant_id:
Database(tenant_id).switch_schema()
else:
return {"message": "You are not a member of this tenant"}, 403

18
common/utils/security.py Normal file
View File

@@ -0,0 +1,18 @@
from flask import session
from common.models import User, Tenant
# Definition of Trigger Handlers
def set_tenant_session_data(sender, user, **kwargs):
tenant = Tenant.query.filter_by(id=user.tenant_id).first()
session['tenant'] = tenant.to_dict()
session['default_language'] = tenant.default_language
session['default_embedding_model'] = tenant.default_embedding_model
session['default_llm_model'] = tenant.default_llm_model
def clear_tenant_session_data(sender, user, **kwargs):
session.pop('tenant', None)
session.pop('default_language', None)
session.pop('default_embedding_model', None)
session.pop('default_llm_model', None)