- Add Catalog Concept to Document Domain

- Create Catalog views
- Modify document stack creation
This commit is contained in:
Josako
2024-10-14 13:56:23 +02:00
parent 0f4558d775
commit 270479c77d
10 changed files with 339 additions and 102 deletions

View File

@@ -2,12 +2,48 @@ from common.extensions import db
from .user import User, Tenant from .user import User, Tenant
from pgvector.sqlalchemy import Vector from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import ARRAY
import sqlalchemy as sa
class Catalog(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
# Embedding variables
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Embedding search variables
es_k = db.Column(db.Integer, nullable=True, default=5)
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
# Chat variables
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
# Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Document(db.Model): class Document(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
name = db.Column(db.String(100), nullable=False) name = db.Column(db.String(100), nullable=False)
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
valid_from = db.Column(db.DateTime, nullable=True) valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True) valid_to = db.Column(db.DateTime, nullable=True)

View File

@@ -34,7 +34,7 @@ class Tenant(db.Model):
embedding_model = db.Column(db.String(50), nullable=True) embedding_model = db.Column(db.String(50), nullable=True)
llm_model = db.Column(db.String(50), nullable=True) llm_model = db.Column(db.String(50), nullable=True)
# Embedding variables # Embedding variables ==> To be removed once all migrations (dev + prod) have been done
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li']) html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li']) html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True) html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)

View File

@@ -17,7 +17,8 @@ from ..models.user import Tenant
def create_document_stack(api_input, file, filename, extension, tenant_id): def create_document_stack(api_input, file, filename, extension, tenant_id):
# Create the Document # Create the Document
new_doc = create_document(api_input, filename, tenant_id) catalog_id = int(api_input.get('catalog_id'))
new_doc = create_document(api_input, filename, catalog_id)
db.session.add(new_doc) db.session.add(new_doc)
# Create the DocumentVersion # Create the DocumentVersion
@@ -45,7 +46,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
return new_doc, new_doc_vers return new_doc, new_doc_vers
def create_document(form, filename, tenant_id): def create_document(form, filename, catalog_id):
new_doc = Document() new_doc = Document()
if form['name'] == '': if form['name'] == '':
new_doc.name = filename.rsplit('.', 1)[0] new_doc.name = filename.rsplit('.', 1)[0]
@@ -56,7 +57,7 @@ def create_document(form, filename, tenant_id):
new_doc.valid_from = form['valid_from'] new_doc.valid_from = form['valid_from']
else: else:
new_doc.valid_from = dt.now(tz.utc) new_doc.valid_from = dt.now(tz.utc)
new_doc.tenant_id = tenant_id new_doc.catalog_id = catalog_id
set_logging_information(new_doc, dt.now(tz.utc)) set_logging_information(new_doc, dt.now(tz.utc))
return new_doc return new_doc

View File

@@ -33,6 +33,7 @@ document_ns = Namespace('documents', description='Document related operations')
# Define models for request parsing and response serialization # Define models for request parsing and response serialization
upload_parser = reqparse.RequestParser() upload_parser = reqparse.RequestParser()
upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to')
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload') upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document') upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document') upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
@@ -75,6 +76,7 @@ class AddDocument(Resource):
validate_file_type(extension) validate_file_type(extension)
api_input = { api_input = {
'catalog_id': args.get('catalog_id'),
'name': args.get('name') or filename, 'name': args.get('name') or filename,
'language': args.get('language'), 'language': args.get('language'),
'user_context': args.get('user_context'), 'user_context': args.get('user_context'),
@@ -102,6 +104,7 @@ class AddDocument(Resource):
# Models for AddURL # Models for AddURL
add_url_model = document_ns.model('AddURL', { add_url_model = document_ns.model('AddURL', {
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
'url': fields.String(required=True, description='URL of the document to add'), 'url': fields.String(required=True, description='URL of the document to add'),
'name': fields.String(required=False, description='Name of the document'), 'name': fields.String(required=False, description='Name of the document'),
'language': fields.String(required=True, description='Language of the document'), 'language': fields.String(required=True, description='Language of the document'),
@@ -138,6 +141,7 @@ class AddURL(Resource):
file_content, filename, extension = process_url(args['url'], tenant_id) file_content, filename, extension = process_url(args['url'], tenant_id)
api_input = { api_input = {
'catalog_id': args['catlog_id'],
'url': args['url'], 'url': args['url'],
'name': args.get('name') or filename, 'name': args.get('name') or filename,
'language': args['language'], 'language': args['language'],

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Catalog Registration{% endblock %}
{% block content_title %}Register Catalog{% endblock %}
{% block content_description %}Define a new catalog of documents in Evie's Library{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Catalog</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %}
{% block title %}Documents{% endblock %}
{% block content_title %}Catalogs{% endblock %}
{% block content_description %}View Catalogs for Tenant{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_catalog_selection') }}">
{{ render_selectable_table(headers=["Catalog ID", "Name"], rows=rows, selectable=True, id="catalogsTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="set_session_catalog" class="btn btn-primary">Set Session Catalog</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.catalogs') }}
{% endblock %}

View File

@@ -0,0 +1,25 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Edit Catalog{% endblock %}
{% block content_title %}Edit Catalog{% endblock %}
{% block content_description %}Edit a catalog of documents in Evie's Library.
When you change chunking of embedding information, you'll need to manually refresh the library if you want immediate impact.
{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Catalog</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -1,8 +1,8 @@
from flask import session, current_app from flask import session, current_app
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import (StringField, BooleanField, SubmitField, DateField, from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, FloatField, SelectMultipleField,
SelectField, FieldList, FormField, TextAreaField, URLField) SelectField, FieldList, FormField, TextAreaField, URLField)
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError, NumberRange
from flask_wtf.file import FileField, FileAllowed, FileRequired from flask_wtf.file import FileField, FileAllowed, FileRequired
import json import json
@@ -23,6 +23,36 @@ def validate_json(form, field):
raise ValidationError('Invalid JSON format') raise ValidationError('Invalid JSON format')
class CatalogForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# HTML Embedding Variables
html_tags = StringField('HTML Tags', validators=[DataRequired()],
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
# Embedding Search variables
es_k = IntegerField('Limit for Searching Embeddings (5)',
default=5,
validators=[NumberRange(min=0)])
es_similarity_threshold = FloatField('Similarity Threshold for Searching Embeddings (0.5)',
default=0.5,
validators=[NumberRange(min=0, max=1)])
# Chat Variables
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
# Tuning variables
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
rag_tuning = BooleanField('Enable RAG Tuning', default=False)
class AddDocumentForm(FlaskForm): class AddDocumentForm(FlaskForm):
file = FileField('File', validators=[FileRequired(), allowed_file]) file = FileField('File', validators=[FileRequired(), allowed_file])
name = StringField('Name', validators=[Length(max=100)]) name = StringField('Name', validators=[Length(max=100)])

View File

@@ -1,6 +1,7 @@
import ast import ast
from datetime import datetime as dt, timezone as tz from datetime import datetime as dt, timezone as tz
from babel.messages.setuptools_frontend import update_catalog
from flask import request, redirect, flash, render_template, Blueprint, session, current_app from flask import request, redirect, flash, render_template, Blueprint, session, current_app
from flask_security import roles_accepted, current_user from flask_security import roles_accepted, current_user
from sqlalchemy import desc from sqlalchemy import desc
@@ -12,14 +13,15 @@ from urllib.parse import urlparse, unquote
import io import io
import json import json
from common.models.document import Document, DocumentVersion from common.models.document import Document, DocumentVersion, Catalog
from common.extensions import db, minio_client from common.extensions import db, minio_client
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \ from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
process_multiple_urls, get_documents_list, edit_document, \ process_multiple_urls, get_documents_list, edit_document, \
edit_document_version, refresh_document edit_document_version, refresh_document
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \ from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
EveAIDoubleURLException EveAIDoubleURLException
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \
CatalogForm
from common.utils.middleware import mw_before_request from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for from common.utils.nginx_utils import prefixed_url_for
@@ -52,6 +54,95 @@ def before_request():
raise raise
@document_bp.route('/catalog', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def catalog():
form = CatalogForm()
if form.validate_on_submit():
tenant_id = session.get('tenant').get('id')
new_catalog = Catalog()
form.populate_obj(new_catalog)
set_logging_information(new_catalog, dt.now(tz.utc))
try:
db.session.add(new_catalog)
db.session.commit()
flash('Catalog successfully added!', 'success')
current_app.logger.info(f'Catalog {new_catalog.name} successfully added for tenant {tenant_id}!')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to add catalog. Error: {e}', 'danger')
current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
f'for tenant {tenant_id}. Error: {str(e)}')
else:
flash('Please fill in all required fields.', 'information')
return render_template('document/catalog.html')
@document_bp.route('/catalogs', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def catalogs():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
query = Catalog.query.order_by(Catalog.id)
pagination = query.paginate(page=page, per_page=per_page)
the_catalogs = pagination.items
# prepare table data
rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', '')])
# Render the catalogs in a template
return render_template('document/catalogs.html', rows=rows, pagination=pagination)
@document_bp.route('/handle_catalog_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_catalog_selection():
catalog_identification = request.form.get('selected_row')
catalog_id = ast.literal_eval(catalog_identification).get('value')
action = request.form['action']
catalog = Catalog.query.get_or_404(catalog_id)
if action == 'set_session_catalog':
session['catalog_id'] = catalog_id
session['catalog_name'] = catalog.name
elif action == 'edit_catalog':
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
return redirect(prefixed_url_for('document_bp.catalogs'))
@document_bp.route('/catalog/<int:catalog_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def edit_catalog(catalog_id):
catalog = Catalog.query.get_or_404(catalog_id)
form = CatalogForm(obj=catalog)
tenant_id = session.get('tenant').get('id')
if request.method == 'POST' and form.validate_on_submit():
form.populate_obj(catalog)
update_logging_information(catalog, dt.now(tz.utc))
try:
db.session.add(catalog)
db.session.commit()
flash('Catalog successfully updated successfully!', 'success')
current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to update catalog. Error: {e}', 'danger')
current_app.logger.error(f'Failed to update catalog {catalog_id} for tenant {tenant_id}. Error: {str(e)}')
return redirect(prefixed_url_for('document_bp.catalogs'))
else:
form_validation_failed(request, form)
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
@document_bp.route('/add_document', methods=['GET', 'POST']) @document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def add_document(): def add_document():
@@ -60,6 +151,7 @@ def add_document():
if form.validate_on_submit(): if form.validate_on_submit():
try: try:
tenant_id = session['tenant']['id'] tenant_id = session['tenant']['id']
catalog_id = session['catalog_id']
file = form.file.data file = form.file.data
filename = secure_filename(file.filename) filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower() extension = filename.rsplit('.', 1)[1].lower()
@@ -68,6 +160,7 @@ def add_document():
current_app.logger.debug(f'Language on form: {form.language.data}') current_app.logger.debug(f'Language on form: {form.language.data}')
api_input = { api_input = {
'catalog_id': catalog_id,
'name': form.name.data, 'name': form.name.data,
'language': form.language.data, 'language': form.language.data,
'user_context': form.user_context.data, 'user_context': form.user_context.data,
@@ -100,11 +193,13 @@ def add_url():
if form.validate_on_submit(): if form.validate_on_submit():
try: try:
tenant_id = session['tenant']['id'] tenant_id = session['tenant']['id']
catalog_id = session['catalog_id']
url = form.url.data url = form.url.data
file_content, filename, extension = process_url(url, tenant_id) file_content, filename, extension = process_url(url, tenant_id)
api_input = { api_input = {
'catalog_id': catalog_id,
'name': form.name.data or filename, 'name': form.name.data or filename,
'url': url, 'url': url,
'language': form.language.data, 'language': form.language.data,
@@ -397,47 +492,47 @@ def fetch_html(url):
return response.content return response.content
def prepare_document_data(docs): # def prepare_document_data(docs):
rows = [] # rows = []
for doc in docs: # for doc in docs:
doc_row = [{'value': doc.name, 'class': '', 'type': 'text'}, # doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
{'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}] # {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
# Document basic details # # Document basic details
if doc.valid_from: # if doc.valid_from:
doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'}) # doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
else: # else:
doc_row.append({'value': '', 'class': '', 'type': 'text'}) # doc_row.append({'value': '', 'class': '', 'type': 'text'})
#
# Nested languages and versions # # Nested languages and versions
languages_rows = [] # languages_rows = []
for lang in doc.languages: # for lang in doc.languages:
lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}] # lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
#
# Latest version details if available (should be available ;-) ) # # Latest version details if available (should be available ;-) )
if lang.latest_version: # if lang.latest_version:
lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"), # lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
'class': '', 'type': 'text'}) # 'class': '', 'type': 'text'})
if lang.latest_version.url: # if lang.latest_version.url:
lang_row.append({'value': lang.latest_version.url, # lang_row.append({'value': lang.latest_version.url,
'class': '', 'type': 'link', 'href': lang.latest_version.url}) # 'class': '', 'type': 'link', 'href': lang.latest_version.url})
else: # else:
lang_row.append({'value': '', 'class': '', 'type': 'text'}) # lang_row.append({'value': '', 'class': '', 'type': 'text'})
#
if lang.latest_version.file_name: # if lang.latest_version.object_name:
lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'}) # lang_row.append({'value': lang.latest_version.object_name, 'class': '', 'type': 'text'})
else: # else:
lang_row.append({'value': '', 'class': '', 'type': 'text'}) # lang_row.append({'value': '', 'class': '', 'type': 'text'})
#
if lang.latest_version.file_type: # if lang.latest_version.file_type:
lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'}) # lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
else: # else:
lang_row.append({'value': '', 'class': '', 'type': 'text'}) # lang_row.append({'value': '', 'class': '', 'type': 'text'})
# Include other details as necessary # # Include other details as necessary
#
languages_rows.append(lang_row) # languages_rows.append(lang_row)
#
doc_row.append({'is_group': True, 'colspan': '5', # doc_row.append({'is_group': True, 'colspan': '5',
'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'], # 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
'sub_rows': languages_rows}) # 'sub_rows': languages_rows})
rows.append(doc_row) # rows.append(doc_row)
return rows # return rows

View File

@@ -26,55 +26,55 @@ def upgrade():
op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True)) op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True))
op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True)) op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True))
# ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ### # # ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
from common.models.document import DocumentVersion # from common.models.document import DocumentVersion
from common.extensions import minio_client # from common.extensions import minio_client
from minio.error import S3Error # from minio.error import S3Error
#
# Create a connection # # Create a connection
connection = op.get_bind() # connection = op.get_bind()
session = Session(bind=connection) # session = Session(bind=connection)
#
# Get the current schema name (which should be the tenant ID) # # Get the current schema name (which should be the tenant ID)
current_schema = connection.execute(text("SELECT current_schema()")).scalar() # current_schema = connection.execute(text("SELECT current_schema()")).scalar()
tenant_id = int(current_schema) # tenant_id = int(current_schema)
#
doc_versions = session.query(DocumentVersion).all() # doc_versions = session.query(DocumentVersion).all()
for doc_version in doc_versions: # for doc_version in doc_versions:
try: # try:
object_name = minio_client.generate_object_name(doc_version.doc_id, # object_name = minio_client.generate_object_name(doc_version.doc_id,
doc_version.language, # doc_version.language,
doc_version.id, # doc_version.id,
doc_version.file_name) # doc_version.file_name)
bucket_name = minio_client.generate_bucket_name(tenant_id) # bucket_name = minio_client.generate_bucket_name(tenant_id)
doc_version.object_name = object_name # doc_version.object_name = object_name
doc_version.bucket_name = bucket_name # doc_version.bucket_name = bucket_name
#
try: # try:
stat = minio_client.client.stat_object( # stat = minio_client.client.stat_object(
bucket_name=bucket_name, # bucket_name=bucket_name,
object_name=object_name # object_name=object_name
) # )
doc_version.file_size = stat.size / 1048576 # doc_version.file_size = stat.size / 1048576
current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}") # current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
except S3Error as e: # except S3Error as e:
if e.code == "NoSuchKey": # if e.code == "NoSuchKey":
current_app.logger.warning( # current_app.logger.warning(
f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.") # f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
continue # Move to the next item # continue # Move to the next item
else: # else:
raise e # Handle other types of S3 errors # raise e # Handle other types of S3 errors
except Exception as e: # except Exception as e:
session.rollback() # session.rollback()
current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for " # current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
f"Tenant {tenant_id}. Error: {str(e)}") # f"Tenant {tenant_id}. Error: {str(e)}")
#
try: # try:
session.commit() # session.commit()
current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}") # current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
except Exception as e: # except Exception as e:
session.rollback() # session.rollback()
current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}") # current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
# ### commands auto generated by Alembic - Remove old fields ### # ### commands auto generated by Alembic - Remove old fields ###
# op.drop_column('document_version', 'file_location') # op.drop_column('document_version', 'file_location')