diff --git a/common/models/document.py b/common/models/document.py
index 1eb9243..e1a2535 100644
--- a/common/models/document.py
+++ b/common/models/document.py
@@ -2,12 +2,48 @@ from common.extensions import db
from .user import User, Tenant
from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.dialects.postgresql import ARRAY
+import sqlalchemy as sa
+
+
+class Catalog(db.Model):
+ id = db.Column(db.Integer, primary_key=True)
+ name = db.Column(db.String(50), nullable=False)
+ description = db.Column(db.Text, nullable=True)
+
+ # Embedding variables
+ html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
+ html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
+ html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
+ html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
+ html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
+
+ min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
+ max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
+
+ # Embedding search variables
+ es_k = db.Column(db.Integer, nullable=True, default=5)
+ es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
+
+ # Chat variables
+ chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
+ chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
+
+ # Tuning enablers
+ embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
+ rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
+
+ # Versioning Information
+ created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
+ created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
+ updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
+ updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Document(db.Model):
id = db.Column(db.Integer, primary_key=True)
+ catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
name = db.Column(db.String(100), nullable=False)
- tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True)
diff --git a/common/models/user.py b/common/models/user.py
index 93b5d4e..f703e59 100644
--- a/common/models/user.py
+++ b/common/models/user.py
@@ -34,7 +34,7 @@ class Tenant(db.Model):
embedding_model = db.Column(db.String(50), nullable=True)
llm_model = db.Column(db.String(50), nullable=True)
- # Embedding variables
+ # Embedding variables ==> To be removed once all migrations (dev + prod) have been done
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
diff --git a/common/utils/document_utils.py b/common/utils/document_utils.py
index f73fb7b..dbe701c 100644
--- a/common/utils/document_utils.py
+++ b/common/utils/document_utils.py
@@ -17,7 +17,8 @@ from ..models.user import Tenant
def create_document_stack(api_input, file, filename, extension, tenant_id):
# Create the Document
- new_doc = create_document(api_input, filename, tenant_id)
+ catalog_id = int(api_input.get('catalog_id'))
+ new_doc = create_document(api_input, filename, catalog_id)
db.session.add(new_doc)
# Create the DocumentVersion
@@ -45,7 +46,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
return new_doc, new_doc_vers
-def create_document(form, filename, tenant_id):
+def create_document(form, filename, catalog_id):
new_doc = Document()
if form['name'] == '':
new_doc.name = filename.rsplit('.', 1)[0]
@@ -56,7 +57,7 @@ def create_document(form, filename, tenant_id):
new_doc.valid_from = form['valid_from']
else:
new_doc.valid_from = dt.now(tz.utc)
- new_doc.tenant_id = tenant_id
+ new_doc.catalog_id = catalog_id
set_logging_information(new_doc, dt.now(tz.utc))
return new_doc
diff --git a/eveai_api/api/document_api.py b/eveai_api/api/document_api.py
index 5e4468d..befcbb3 100644
--- a/eveai_api/api/document_api.py
+++ b/eveai_api/api/document_api.py
@@ -33,6 +33,7 @@ document_ns = Namespace('documents', description='Document related operations')
# Define models for request parsing and response serialization
upload_parser = reqparse.RequestParser()
+upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to')
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
@@ -75,6 +76,7 @@ class AddDocument(Resource):
validate_file_type(extension)
api_input = {
+ 'catalog_id': args.get('catalog_id'),
'name': args.get('name') or filename,
'language': args.get('language'),
'user_context': args.get('user_context'),
@@ -102,6 +104,7 @@ class AddDocument(Resource):
# Models for AddURL
add_url_model = document_ns.model('AddURL', {
+ 'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
'url': fields.String(required=True, description='URL of the document to add'),
'name': fields.String(required=False, description='Name of the document'),
'language': fields.String(required=True, description='Language of the document'),
@@ -138,6 +141,7 @@ class AddURL(Resource):
file_content, filename, extension = process_url(args['url'], tenant_id)
api_input = {
+ 'catalog_id': args['catlog_id'],
'url': args['url'],
'name': args.get('name') or filename,
'language': args['language'],
diff --git a/eveai_app/templates/document/catalog.html b/eveai_app/templates/document/catalog.html
new file mode 100644
index 0000000..4b4ba7e
--- /dev/null
+++ b/eveai_app/templates/document/catalog.html
@@ -0,0 +1,23 @@
+{% extends 'base.html' %}
+{% from "macros.html" import render_field %}
+
+{% block title %}Catalog Registration{% endblock %}
+
+{% block content_title %}Register Catalog{% endblock %}
+{% block content_description %}Define a new catalog of documents in Evie's Library{% endblock %}
+
+{% block content %}
+
+{% endblock %}
+
+{% block content_footer %}
+
+{% endblock %}
diff --git a/eveai_app/templates/document/catalogs.html b/eveai_app/templates/document/catalogs.html
new file mode 100644
index 0000000..513c216
--- /dev/null
+++ b/eveai_app/templates/document/catalogs.html
@@ -0,0 +1,23 @@
+{% extends 'base.html' %}
+{% from 'macros.html' import render_selectable_table, render_pagination %}
+
+{% block title %}Documents{% endblock %}
+
+{% block content_title %}Catalogs{% endblock %}
+{% block content_description %}View Catalogs for Tenant{% endblock %}
+{% block content_class %}{% endblock %}
+
+{% block content %}
+
+{% endblock %}
+
+{% block content_footer %}
+ {{ render_pagination(pagination, 'document_bp.catalogs') }}
+{% endblock %}
\ No newline at end of file
diff --git a/eveai_app/templates/document/edit_catalog.html b/eveai_app/templates/document/edit_catalog.html
new file mode 100644
index 0000000..f205e4c
--- /dev/null
+++ b/eveai_app/templates/document/edit_catalog.html
@@ -0,0 +1,25 @@
+{% extends 'base.html' %}
+{% from "macros.html" import render_field %}
+
+{% block title %}Edit Catalog{% endblock %}
+
+{% block content_title %}Edit Catalog{% endblock %}
+{% block content_description %}Edit a catalog of documents in Evie's Library.
+When you change chunking of embedding information, you'll need to manually refresh the library if you want immediate impact.
+{% endblock %}
+
+{% block content %}
+
+{% endblock %}
+
+{% block content_footer %}
+
+{% endblock %}
diff --git a/eveai_app/views/document_forms.py b/eveai_app/views/document_forms.py
index d734f34..87427e1 100644
--- a/eveai_app/views/document_forms.py
+++ b/eveai_app/views/document_forms.py
@@ -1,8 +1,8 @@
from flask import session, current_app
from flask_wtf import FlaskForm
-from wtforms import (StringField, BooleanField, SubmitField, DateField,
+from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, FloatField, SelectMultipleField,
SelectField, FieldList, FormField, TextAreaField, URLField)
-from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError
+from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError, NumberRange
from flask_wtf.file import FileField, FileAllowed, FileRequired
import json
@@ -23,6 +23,36 @@ def validate_json(form, field):
raise ValidationError('Invalid JSON format')
+class CatalogForm(FlaskForm):
+ name = StringField('Name', validators=[DataRequired(), Length(max=50)])
+ description = TextAreaField('Description', validators=[Optional()])
+ # HTML Embedding Variables
+ html_tags = StringField('HTML Tags', validators=[DataRequired()],
+ default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
+ html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
+ default='p, li')
+ html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
+ html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
+ html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
+ min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
+ default=2000)
+ max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
+ default=3000)
+ # Embedding Search variables
+ es_k = IntegerField('Limit for Searching Embeddings (5)',
+ default=5,
+ validators=[NumberRange(min=0)])
+ es_similarity_threshold = FloatField('Similarity Threshold for Searching Embeddings (0.5)',
+ default=0.5,
+ validators=[NumberRange(min=0, max=1)])
+ # Chat Variables
+ chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
+ chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
+ # Tuning variables
+ embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
+ rag_tuning = BooleanField('Enable RAG Tuning', default=False)
+
+
class AddDocumentForm(FlaskForm):
file = FileField('File', validators=[FileRequired(), allowed_file])
name = StringField('Name', validators=[Length(max=100)])
diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py
index f31d511..2d432e6 100644
--- a/eveai_app/views/document_views.py
+++ b/eveai_app/views/document_views.py
@@ -1,6 +1,7 @@
import ast
from datetime import datetime as dt, timezone as tz
+from babel.messages.setuptools_frontend import update_catalog
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
from flask_security import roles_accepted, current_user
from sqlalchemy import desc
@@ -12,14 +13,15 @@ from urllib.parse import urlparse, unquote
import io
import json
-from common.models.document import Document, DocumentVersion
+from common.models.document import Document, DocumentVersion, Catalog
from common.extensions import db, minio_client
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
process_multiple_urls, get_documents_list, edit_document, \
edit_document_version, refresh_document
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
EveAIDoubleURLException
-from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
+from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \
+ CatalogForm
from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for
@@ -52,6 +54,95 @@ def before_request():
raise
+@document_bp.route('/catalog', methods=['GET', 'POST'])
+@roles_accepted('Super User', 'Tenant Admin')
+def catalog():
+ form = CatalogForm()
+
+ if form.validate_on_submit():
+ tenant_id = session.get('tenant').get('id')
+ new_catalog = Catalog()
+ form.populate_obj(new_catalog)
+ set_logging_information(new_catalog, dt.now(tz.utc))
+
+ try:
+ db.session.add(new_catalog)
+ db.session.commit()
+ flash('Catalog successfully added!', 'success')
+ current_app.logger.info(f'Catalog {new_catalog.name} successfully added for tenant {tenant_id}!')
+ except SQLAlchemyError as e:
+ db.session.rollback()
+ flash(f'Failed to add catalog. Error: {e}', 'danger')
+ current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
+ f'for tenant {tenant_id}. Error: {str(e)}')
+ else:
+ flash('Please fill in all required fields.', 'information')
+
+ return render_template('document/catalog.html')
+
+
+@document_bp.route('/catalogs', methods=['POST'])
+@roles_accepted('Super User', 'Tenant Admin')
+def catalogs():
+ page = request.args.get('page', 1, type=int)
+ per_page = request.args.get('per_page', 10, type=int)
+
+ query = Catalog.query.order_by(Catalog.id)
+
+ pagination = query.paginate(page=page, per_page=per_page)
+ the_catalogs = pagination.items
+
+ # prepare table data
+ rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', '')])
+
+ # Render the catalogs in a template
+ return render_template('document/catalogs.html', rows=rows, pagination=pagination)
+
+
+@document_bp.route('/handle_catalog_selection', methods=['POST'])
+@roles_accepted('Super User', 'Tenant Admin')
+def handle_catalog_selection():
+ catalog_identification = request.form.get('selected_row')
+ catalog_id = ast.literal_eval(catalog_identification).get('value')
+ action = request.form['action']
+ catalog = Catalog.query.get_or_404(catalog_id)
+
+ if action == 'set_session_catalog':
+ session['catalog_id'] = catalog_id
+ session['catalog_name'] = catalog.name
+ elif action == 'edit_catalog':
+ return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
+
+ return redirect(prefixed_url_for('document_bp.catalogs'))
+
+
+@document_bp.route('/catalog/', methods=['GET', 'POST'])
+@roles_accepted('Super User', 'Tenant Admin')
+def edit_catalog(catalog_id):
+ catalog = Catalog.query.get_or_404(catalog_id)
+ form = CatalogForm(obj=catalog)
+ tenant_id = session.get('tenant').get('id')
+
+ if request.method == 'POST' and form.validate_on_submit():
+ form.populate_obj(catalog)
+ update_logging_information(catalog, dt.now(tz.utc))
+ try:
+ db.session.add(catalog)
+ db.session.commit()
+ flash('Catalog successfully updated successfully!', 'success')
+ current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
+ except SQLAlchemyError as e:
+ db.session.rollback()
+ flash(f'Failed to update catalog. Error: {e}', 'danger')
+ current_app.logger.error(f'Failed to update catalog {catalog_id} for tenant {tenant_id}. Error: {str(e)}')
+
+ return redirect(prefixed_url_for('document_bp.catalogs'))
+ else:
+ form_validation_failed(request, form)
+
+ return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
+
+
@document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_document():
@@ -60,6 +151,7 @@ def add_document():
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
+ catalog_id = session['catalog_id']
file = form.file.data
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
@@ -68,6 +160,7 @@ def add_document():
current_app.logger.debug(f'Language on form: {form.language.data}')
api_input = {
+ 'catalog_id': catalog_id,
'name': form.name.data,
'language': form.language.data,
'user_context': form.user_context.data,
@@ -100,11 +193,13 @@ def add_url():
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
+ catalog_id = session['catalog_id']
url = form.url.data
file_content, filename, extension = process_url(url, tenant_id)
api_input = {
+ 'catalog_id': catalog_id,
'name': form.name.data or filename,
'url': url,
'language': form.language.data,
@@ -397,47 +492,47 @@ def fetch_html(url):
return response.content
-def prepare_document_data(docs):
- rows = []
- for doc in docs:
- doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
- {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
- # Document basic details
- if doc.valid_from:
- doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
- else:
- doc_row.append({'value': '', 'class': '', 'type': 'text'})
-
- # Nested languages and versions
- languages_rows = []
- for lang in doc.languages:
- lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
-
- # Latest version details if available (should be available ;-) )
- if lang.latest_version:
- lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
- 'class': '', 'type': 'text'})
- if lang.latest_version.url:
- lang_row.append({'value': lang.latest_version.url,
- 'class': '', 'type': 'link', 'href': lang.latest_version.url})
- else:
- lang_row.append({'value': '', 'class': '', 'type': 'text'})
-
- if lang.latest_version.file_name:
- lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'})
- else:
- lang_row.append({'value': '', 'class': '', 'type': 'text'})
-
- if lang.latest_version.file_type:
- lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
- else:
- lang_row.append({'value': '', 'class': '', 'type': 'text'})
- # Include other details as necessary
-
- languages_rows.append(lang_row)
-
- doc_row.append({'is_group': True, 'colspan': '5',
- 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
- 'sub_rows': languages_rows})
- rows.append(doc_row)
- return rows
+# def prepare_document_data(docs):
+# rows = []
+# for doc in docs:
+# doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
+# {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
+# # Document basic details
+# if doc.valid_from:
+# doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
+# else:
+# doc_row.append({'value': '', 'class': '', 'type': 'text'})
+#
+# # Nested languages and versions
+# languages_rows = []
+# for lang in doc.languages:
+# lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
+#
+# # Latest version details if available (should be available ;-) )
+# if lang.latest_version:
+# lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
+# 'class': '', 'type': 'text'})
+# if lang.latest_version.url:
+# lang_row.append({'value': lang.latest_version.url,
+# 'class': '', 'type': 'link', 'href': lang.latest_version.url})
+# else:
+# lang_row.append({'value': '', 'class': '', 'type': 'text'})
+#
+# if lang.latest_version.object_name:
+# lang_row.append({'value': lang.latest_version.object_name, 'class': '', 'type': 'text'})
+# else:
+# lang_row.append({'value': '', 'class': '', 'type': 'text'})
+#
+# if lang.latest_version.file_type:
+# lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
+# else:
+# lang_row.append({'value': '', 'class': '', 'type': 'text'})
+# # Include other details as necessary
+#
+# languages_rows.append(lang_row)
+#
+# doc_row.append({'is_group': True, 'colspan': '5',
+# 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
+# 'sub_rows': languages_rows})
+# rows.append(doc_row)
+# return rows
diff --git a/migrations/tenant/versions/322d3cf1f17b_documentversion_update_to_bucket_name_.py b/migrations/tenant/versions/322d3cf1f17b_documentversion_update_to_bucket_name_.py
index 583638d..673404b 100644
--- a/migrations/tenant/versions/322d3cf1f17b_documentversion_update_to_bucket_name_.py
+++ b/migrations/tenant/versions/322d3cf1f17b_documentversion_update_to_bucket_name_.py
@@ -26,55 +26,55 @@ def upgrade():
op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True))
op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True))
- # ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
- from common.models.document import DocumentVersion
- from common.extensions import minio_client
- from minio.error import S3Error
-
- # Create a connection
- connection = op.get_bind()
- session = Session(bind=connection)
-
- # Get the current schema name (which should be the tenant ID)
- current_schema = connection.execute(text("SELECT current_schema()")).scalar()
- tenant_id = int(current_schema)
-
- doc_versions = session.query(DocumentVersion).all()
- for doc_version in doc_versions:
- try:
- object_name = minio_client.generate_object_name(doc_version.doc_id,
- doc_version.language,
- doc_version.id,
- doc_version.file_name)
- bucket_name = minio_client.generate_bucket_name(tenant_id)
- doc_version.object_name = object_name
- doc_version.bucket_name = bucket_name
-
- try:
- stat = minio_client.client.stat_object(
- bucket_name=bucket_name,
- object_name=object_name
- )
- doc_version.file_size = stat.size / 1048576
- current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
- except S3Error as e:
- if e.code == "NoSuchKey":
- current_app.logger.warning(
- f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
- continue # Move to the next item
- else:
- raise e # Handle other types of S3 errors
- except Exception as e:
- session.rollback()
- current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
- f"Tenant {tenant_id}. Error: {str(e)}")
-
- try:
- session.commit()
- current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
- except Exception as e:
- session.rollback()
- current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
+ # # ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
+ # from common.models.document import DocumentVersion
+ # from common.extensions import minio_client
+ # from minio.error import S3Error
+ #
+ # # Create a connection
+ # connection = op.get_bind()
+ # session = Session(bind=connection)
+ #
+ # # Get the current schema name (which should be the tenant ID)
+ # current_schema = connection.execute(text("SELECT current_schema()")).scalar()
+ # tenant_id = int(current_schema)
+ #
+ # doc_versions = session.query(DocumentVersion).all()
+ # for doc_version in doc_versions:
+ # try:
+ # object_name = minio_client.generate_object_name(doc_version.doc_id,
+ # doc_version.language,
+ # doc_version.id,
+ # doc_version.file_name)
+ # bucket_name = minio_client.generate_bucket_name(tenant_id)
+ # doc_version.object_name = object_name
+ # doc_version.bucket_name = bucket_name
+ #
+ # try:
+ # stat = minio_client.client.stat_object(
+ # bucket_name=bucket_name,
+ # object_name=object_name
+ # )
+ # doc_version.file_size = stat.size / 1048576
+ # current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
+ # except S3Error as e:
+ # if e.code == "NoSuchKey":
+ # current_app.logger.warning(
+ # f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
+ # continue # Move to the next item
+ # else:
+ # raise e # Handle other types of S3 errors
+ # except Exception as e:
+ # session.rollback()
+ # current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
+ # f"Tenant {tenant_id}. Error: {str(e)}")
+ #
+ # try:
+ # session.commit()
+ # current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
+ # except Exception as e:
+ # session.rollback()
+ # current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
# ### commands auto generated by Alembic - Remove old fields ###
# op.drop_column('document_version', 'file_location')