- Refined entitlements to work with MiB for both embeddings and storage

- Improved DocumentVersion storage attributes to reflect Minio settings
- Added size to DocumentVersions to easily calculate usage
- License / LicenseTier forms and views added
This commit is contained in:
Josako
2024-10-07 14:17:44 +02:00
parent f638860e90
commit 9782e31ae5
31 changed files with 1416 additions and 83 deletions

View File

@@ -0,0 +1,94 @@
"""DocumentVersion update to bucket_name, object_name and file_size to better reflet Minio reality
Revision ID: 322d3cf1f17b
Revises: 711a09a77680
Create Date: 2024-10-07 07:45:19.014017
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy import text
from flask import current_app
# revision identifiers, used by Alembic.
revision = '322d3cf1f17b'
down_revision = '711a09a77680'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - Add new fields ###
op.add_column('document_version', sa.Column('bucket_name', sa.String(length=255), nullable=True))
op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True))
op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True))
# ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
from common.models.document import DocumentVersion
from common.extensions import minio_client
from minio.error import S3Error
# Create a connection
connection = op.get_bind()
session = Session(bind=connection)
# Get the current schema name (which should be the tenant ID)
current_schema = connection.execute(text("SELECT current_schema()")).scalar()
tenant_id = int(current_schema)
doc_versions = session.query(DocumentVersion).all()
for doc_version in doc_versions:
try:
object_name = minio_client.generate_object_name(doc_version.doc_id,
doc_version.language,
doc_version.id,
doc_version.file_name)
bucket_name = minio_client.generate_bucket_name(tenant_id)
doc_version.object_name = object_name
doc_version.bucket_name = bucket_name
try:
stat = minio_client.client.stat_object(
bucket_name=bucket_name,
object_name=object_name
)
doc_version.file_size = stat.size / 1048576
current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
except S3Error as e:
if e.code == "NoSuchKey":
current_app.logger.warning(
f"Object {doc_version.file_location} not found in bucket {doc_version.bucket_name}. Skipping.")
continue # Move to the next item
else:
raise e # Handle other types of S3 errors
except Exception as e:
session.rollback()
current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
f"Tenant {tenant_id}. Error: {str(e)}")
try:
session.commit()
current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
except Exception as e:
session.rollback()
current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
# ### commands auto generated by Alembic - Remove old fields ###
# op.drop_column('document_version', 'file_location')
# op.drop_column('document_version', 'file_name')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_version', sa.Column('file_name', sa.VARCHAR(length=200), autoincrement=False, nullable=True))
op.add_column('document_version', sa.Column('file_location', sa.VARCHAR(length=255), autoincrement=False, nullable=True))
op.drop_column('document_version', 'file_size')
op.drop_column('document_version', 'object_name')
op.drop_column('document_version', 'bucket_name')
# ### end Alembic commands ###