- Add Catalog Concept to Document Domain
- Create Catalog views - Modify document stack creation
This commit is contained in:
@@ -2,12 +2,48 @@ from common.extensions import db
|
||||
from .user import User, Tenant
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
class Catalog(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
name = db.Column(db.String(50), nullable=False)
|
||||
description = db.Column(db.Text, nullable=True)
|
||||
|
||||
# Embedding variables
|
||||
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
|
||||
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
|
||||
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
||||
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
||||
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
|
||||
|
||||
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
|
||||
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
|
||||
|
||||
# Embedding search variables
|
||||
es_k = db.Column(db.Integer, nullable=True, default=5)
|
||||
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
|
||||
|
||||
# Chat variables
|
||||
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
|
||||
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
|
||||
|
||||
# Tuning enablers
|
||||
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
||||
rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
||||
|
||||
# Versioning Information
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
|
||||
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
||||
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||
|
||||
|
||||
class Document(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
|
||||
name = db.Column(db.String(100), nullable=False)
|
||||
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
|
||||
valid_from = db.Column(db.DateTime, nullable=True)
|
||||
valid_to = db.Column(db.DateTime, nullable=True)
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class Tenant(db.Model):
|
||||
embedding_model = db.Column(db.String(50), nullable=True)
|
||||
llm_model = db.Column(db.String(50), nullable=True)
|
||||
|
||||
# Embedding variables
|
||||
# Embedding variables ==> To be removed once all migrations (dev + prod) have been done
|
||||
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
|
||||
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
|
||||
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
||||
|
||||
@@ -17,7 +17,8 @@ from ..models.user import Tenant
|
||||
|
||||
def create_document_stack(api_input, file, filename, extension, tenant_id):
|
||||
# Create the Document
|
||||
new_doc = create_document(api_input, filename, tenant_id)
|
||||
catalog_id = int(api_input.get('catalog_id'))
|
||||
new_doc = create_document(api_input, filename, catalog_id)
|
||||
db.session.add(new_doc)
|
||||
|
||||
# Create the DocumentVersion
|
||||
@@ -45,7 +46,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
|
||||
return new_doc, new_doc_vers
|
||||
|
||||
|
||||
def create_document(form, filename, tenant_id):
|
||||
def create_document(form, filename, catalog_id):
|
||||
new_doc = Document()
|
||||
if form['name'] == '':
|
||||
new_doc.name = filename.rsplit('.', 1)[0]
|
||||
@@ -56,7 +57,7 @@ def create_document(form, filename, tenant_id):
|
||||
new_doc.valid_from = form['valid_from']
|
||||
else:
|
||||
new_doc.valid_from = dt.now(tz.utc)
|
||||
new_doc.tenant_id = tenant_id
|
||||
new_doc.catalog_id = catalog_id
|
||||
set_logging_information(new_doc, dt.now(tz.utc))
|
||||
|
||||
return new_doc
|
||||
|
||||
@@ -33,6 +33,7 @@ document_ns = Namespace('documents', description='Document related operations')
|
||||
|
||||
# Define models for request parsing and response serialization
|
||||
upload_parser = reqparse.RequestParser()
|
||||
upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to')
|
||||
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
|
||||
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
|
||||
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
|
||||
@@ -75,6 +76,7 @@ class AddDocument(Resource):
|
||||
validate_file_type(extension)
|
||||
|
||||
api_input = {
|
||||
'catalog_id': args.get('catalog_id'),
|
||||
'name': args.get('name') or filename,
|
||||
'language': args.get('language'),
|
||||
'user_context': args.get('user_context'),
|
||||
@@ -102,6 +104,7 @@ class AddDocument(Resource):
|
||||
|
||||
# Models for AddURL
|
||||
add_url_model = document_ns.model('AddURL', {
|
||||
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
|
||||
'url': fields.String(required=True, description='URL of the document to add'),
|
||||
'name': fields.String(required=False, description='Name of the document'),
|
||||
'language': fields.String(required=True, description='Language of the document'),
|
||||
@@ -138,6 +141,7 @@ class AddURL(Resource):
|
||||
file_content, filename, extension = process_url(args['url'], tenant_id)
|
||||
|
||||
api_input = {
|
||||
'catalog_id': args['catlog_id'],
|
||||
'url': args['url'],
|
||||
'name': args.get('name') or filename,
|
||||
'language': args['language'],
|
||||
|
||||
23
eveai_app/templates/document/catalog.html
Normal file
23
eveai_app/templates/document/catalog.html
Normal file
@@ -0,0 +1,23 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from "macros.html" import render_field %}
|
||||
|
||||
{% block title %}Catalog Registration{% endblock %}
|
||||
|
||||
{% block content_title %}Register Catalog{% endblock %}
|
||||
{% block content_description %}Define a new catalog of documents in Evie's Library{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<form method="post">
|
||||
{{ form.hidden_tag() }}
|
||||
{% set disabled_fields = [] %}
|
||||
{% set exclude_fields = [] %}
|
||||
{% for field in form %}
|
||||
{{ render_field(field, disabled_fields, exclude_fields) }}
|
||||
{% endfor %}
|
||||
<button type="submit" class="btn btn-primary">Register Catalog</button>
|
||||
</form>
|
||||
{% endblock %}
|
||||
|
||||
{% block content_footer %}
|
||||
|
||||
{% endblock %}
|
||||
23
eveai_app/templates/document/catalogs.html
Normal file
23
eveai_app/templates/document/catalogs.html
Normal file
@@ -0,0 +1,23 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from 'macros.html' import render_selectable_table, render_pagination %}
|
||||
|
||||
{% block title %}Documents{% endblock %}
|
||||
|
||||
{% block content_title %}Catalogs{% endblock %}
|
||||
{% block content_description %}View Catalogs for Tenant{% endblock %}
|
||||
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<form method="POST" action="{{ url_for('document_bp.handle_catalog_selection') }}">
|
||||
{{ render_selectable_table(headers=["Catalog ID", "Name"], rows=rows, selectable=True, id="catalogsTable") }}
|
||||
<div class="form-group mt-3">
|
||||
<button type="submit" name="action" value="set_session_catalog" class="btn btn-primary">Set Session Catalog</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block content_footer %}
|
||||
{{ render_pagination(pagination, 'document_bp.catalogs') }}
|
||||
{% endblock %}
|
||||
25
eveai_app/templates/document/edit_catalog.html
Normal file
25
eveai_app/templates/document/edit_catalog.html
Normal file
@@ -0,0 +1,25 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from "macros.html" import render_field %}
|
||||
|
||||
{% block title %}Edit Catalog{% endblock %}
|
||||
|
||||
{% block content_title %}Edit Catalog{% endblock %}
|
||||
{% block content_description %}Edit a catalog of documents in Evie's Library.
|
||||
When you change chunking of embedding information, you'll need to manually refresh the library if you want immediate impact.
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<form method="post">
|
||||
{{ form.hidden_tag() }}
|
||||
{% set disabled_fields = [] %}
|
||||
{% set exclude_fields = [] %}
|
||||
{% for field in form %}
|
||||
{{ render_field(field, disabled_fields, exclude_fields) }}
|
||||
{% endfor %}
|
||||
<button type="submit" class="btn btn-primary">Register Catalog</button>
|
||||
</form>
|
||||
{% endblock %}
|
||||
|
||||
{% block content_footer %}
|
||||
|
||||
{% endblock %}
|
||||
@@ -1,8 +1,8 @@
|
||||
from flask import session, current_app
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import (StringField, BooleanField, SubmitField, DateField,
|
||||
from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, FloatField, SelectMultipleField,
|
||||
SelectField, FieldList, FormField, TextAreaField, URLField)
|
||||
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError
|
||||
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError, NumberRange
|
||||
from flask_wtf.file import FileField, FileAllowed, FileRequired
|
||||
import json
|
||||
|
||||
@@ -23,6 +23,36 @@ def validate_json(form, field):
|
||||
raise ValidationError('Invalid JSON format')
|
||||
|
||||
|
||||
class CatalogForm(FlaskForm):
|
||||
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
|
||||
description = TextAreaField('Description', validators=[Optional()])
|
||||
# HTML Embedding Variables
|
||||
html_tags = StringField('HTML Tags', validators=[DataRequired()],
|
||||
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
|
||||
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
|
||||
default='p, li')
|
||||
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
|
||||
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
|
||||
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
|
||||
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
|
||||
default=2000)
|
||||
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
|
||||
default=3000)
|
||||
# Embedding Search variables
|
||||
es_k = IntegerField('Limit for Searching Embeddings (5)',
|
||||
default=5,
|
||||
validators=[NumberRange(min=0)])
|
||||
es_similarity_threshold = FloatField('Similarity Threshold for Searching Embeddings (0.5)',
|
||||
default=0.5,
|
||||
validators=[NumberRange(min=0, max=1)])
|
||||
# Chat Variables
|
||||
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
|
||||
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
|
||||
# Tuning variables
|
||||
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
|
||||
rag_tuning = BooleanField('Enable RAG Tuning', default=False)
|
||||
|
||||
|
||||
class AddDocumentForm(FlaskForm):
|
||||
file = FileField('File', validators=[FileRequired(), allowed_file])
|
||||
name = StringField('Name', validators=[Length(max=100)])
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import ast
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
|
||||
from babel.messages.setuptools_frontend import update_catalog
|
||||
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
|
||||
from flask_security import roles_accepted, current_user
|
||||
from sqlalchemy import desc
|
||||
@@ -12,14 +13,15 @@ from urllib.parse import urlparse, unquote
|
||||
import io
|
||||
import json
|
||||
|
||||
from common.models.document import Document, DocumentVersion
|
||||
from common.models.document import Document, DocumentVersion, Catalog
|
||||
from common.extensions import db, minio_client
|
||||
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
|
||||
process_multiple_urls, get_documents_list, edit_document, \
|
||||
edit_document_version, refresh_document
|
||||
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
|
||||
EveAIDoubleURLException
|
||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
|
||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \
|
||||
CatalogForm
|
||||
from common.utils.middleware import mw_before_request
|
||||
from common.utils.celery_utils import current_celery
|
||||
from common.utils.nginx_utils import prefixed_url_for
|
||||
@@ -52,6 +54,95 @@ def before_request():
|
||||
raise
|
||||
|
||||
|
||||
@document_bp.route('/catalog', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def catalog():
|
||||
form = CatalogForm()
|
||||
|
||||
if form.validate_on_submit():
|
||||
tenant_id = session.get('tenant').get('id')
|
||||
new_catalog = Catalog()
|
||||
form.populate_obj(new_catalog)
|
||||
set_logging_information(new_catalog, dt.now(tz.utc))
|
||||
|
||||
try:
|
||||
db.session.add(new_catalog)
|
||||
db.session.commit()
|
||||
flash('Catalog successfully added!', 'success')
|
||||
current_app.logger.info(f'Catalog {new_catalog.name} successfully added for tenant {tenant_id}!')
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
flash(f'Failed to add catalog. Error: {e}', 'danger')
|
||||
current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
|
||||
f'for tenant {tenant_id}. Error: {str(e)}')
|
||||
else:
|
||||
flash('Please fill in all required fields.', 'information')
|
||||
|
||||
return render_template('document/catalog.html')
|
||||
|
||||
|
||||
@document_bp.route('/catalogs', methods=['POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def catalogs():
|
||||
page = request.args.get('page', 1, type=int)
|
||||
per_page = request.args.get('per_page', 10, type=int)
|
||||
|
||||
query = Catalog.query.order_by(Catalog.id)
|
||||
|
||||
pagination = query.paginate(page=page, per_page=per_page)
|
||||
the_catalogs = pagination.items
|
||||
|
||||
# prepare table data
|
||||
rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', '')])
|
||||
|
||||
# Render the catalogs in a template
|
||||
return render_template('document/catalogs.html', rows=rows, pagination=pagination)
|
||||
|
||||
|
||||
@document_bp.route('/handle_catalog_selection', methods=['POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def handle_catalog_selection():
|
||||
catalog_identification = request.form.get('selected_row')
|
||||
catalog_id = ast.literal_eval(catalog_identification).get('value')
|
||||
action = request.form['action']
|
||||
catalog = Catalog.query.get_or_404(catalog_id)
|
||||
|
||||
if action == 'set_session_catalog':
|
||||
session['catalog_id'] = catalog_id
|
||||
session['catalog_name'] = catalog.name
|
||||
elif action == 'edit_catalog':
|
||||
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.catalogs'))
|
||||
|
||||
|
||||
@document_bp.route('/catalog/<int:catalog_id>', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def edit_catalog(catalog_id):
|
||||
catalog = Catalog.query.get_or_404(catalog_id)
|
||||
form = CatalogForm(obj=catalog)
|
||||
tenant_id = session.get('tenant').get('id')
|
||||
|
||||
if request.method == 'POST' and form.validate_on_submit():
|
||||
form.populate_obj(catalog)
|
||||
update_logging_information(catalog, dt.now(tz.utc))
|
||||
try:
|
||||
db.session.add(catalog)
|
||||
db.session.commit()
|
||||
flash('Catalog successfully updated successfully!', 'success')
|
||||
current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
flash(f'Failed to update catalog. Error: {e}', 'danger')
|
||||
current_app.logger.error(f'Failed to update catalog {catalog_id} for tenant {tenant_id}. Error: {str(e)}')
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.catalogs'))
|
||||
else:
|
||||
form_validation_failed(request, form)
|
||||
|
||||
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
|
||||
|
||||
|
||||
@document_bp.route('/add_document', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def add_document():
|
||||
@@ -60,6 +151,7 @@ def add_document():
|
||||
if form.validate_on_submit():
|
||||
try:
|
||||
tenant_id = session['tenant']['id']
|
||||
catalog_id = session['catalog_id']
|
||||
file = form.file.data
|
||||
filename = secure_filename(file.filename)
|
||||
extension = filename.rsplit('.', 1)[1].lower()
|
||||
@@ -68,6 +160,7 @@ def add_document():
|
||||
|
||||
current_app.logger.debug(f'Language on form: {form.language.data}')
|
||||
api_input = {
|
||||
'catalog_id': catalog_id,
|
||||
'name': form.name.data,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
@@ -100,11 +193,13 @@ def add_url():
|
||||
if form.validate_on_submit():
|
||||
try:
|
||||
tenant_id = session['tenant']['id']
|
||||
catalog_id = session['catalog_id']
|
||||
url = form.url.data
|
||||
|
||||
file_content, filename, extension = process_url(url, tenant_id)
|
||||
|
||||
api_input = {
|
||||
'catalog_id': catalog_id,
|
||||
'name': form.name.data or filename,
|
||||
'url': url,
|
||||
'language': form.language.data,
|
||||
@@ -397,47 +492,47 @@ def fetch_html(url):
|
||||
return response.content
|
||||
|
||||
|
||||
def prepare_document_data(docs):
|
||||
rows = []
|
||||
for doc in docs:
|
||||
doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
|
||||
{'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
|
||||
# Document basic details
|
||||
if doc.valid_from:
|
||||
doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
|
||||
else:
|
||||
doc_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
|
||||
# Nested languages and versions
|
||||
languages_rows = []
|
||||
for lang in doc.languages:
|
||||
lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
|
||||
|
||||
# Latest version details if available (should be available ;-) )
|
||||
if lang.latest_version:
|
||||
lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'class': '', 'type': 'text'})
|
||||
if lang.latest_version.url:
|
||||
lang_row.append({'value': lang.latest_version.url,
|
||||
'class': '', 'type': 'link', 'href': lang.latest_version.url})
|
||||
else:
|
||||
lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
|
||||
if lang.latest_version.file_name:
|
||||
lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'})
|
||||
else:
|
||||
lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
|
||||
if lang.latest_version.file_type:
|
||||
lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
|
||||
else:
|
||||
lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
# Include other details as necessary
|
||||
|
||||
languages_rows.append(lang_row)
|
||||
|
||||
doc_row.append({'is_group': True, 'colspan': '5',
|
||||
'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
|
||||
'sub_rows': languages_rows})
|
||||
rows.append(doc_row)
|
||||
return rows
|
||||
# def prepare_document_data(docs):
|
||||
# rows = []
|
||||
# for doc in docs:
|
||||
# doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
|
||||
# {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
|
||||
# # Document basic details
|
||||
# if doc.valid_from:
|
||||
# doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
|
||||
# else:
|
||||
# doc_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
#
|
||||
# # Nested languages and versions
|
||||
# languages_rows = []
|
||||
# for lang in doc.languages:
|
||||
# lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
|
||||
#
|
||||
# # Latest version details if available (should be available ;-) )
|
||||
# if lang.latest_version:
|
||||
# lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
# 'class': '', 'type': 'text'})
|
||||
# if lang.latest_version.url:
|
||||
# lang_row.append({'value': lang.latest_version.url,
|
||||
# 'class': '', 'type': 'link', 'href': lang.latest_version.url})
|
||||
# else:
|
||||
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
#
|
||||
# if lang.latest_version.object_name:
|
||||
# lang_row.append({'value': lang.latest_version.object_name, 'class': '', 'type': 'text'})
|
||||
# else:
|
||||
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
#
|
||||
# if lang.latest_version.file_type:
|
||||
# lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
|
||||
# else:
|
||||
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
|
||||
# # Include other details as necessary
|
||||
#
|
||||
# languages_rows.append(lang_row)
|
||||
#
|
||||
# doc_row.append({'is_group': True, 'colspan': '5',
|
||||
# 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
|
||||
# 'sub_rows': languages_rows})
|
||||
# rows.append(doc_row)
|
||||
# return rows
|
||||
|
||||
@@ -26,55 +26,55 @@ def upgrade():
|
||||
op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True))
|
||||
op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True))
|
||||
|
||||
# ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
|
||||
from common.models.document import DocumentVersion
|
||||
from common.extensions import minio_client
|
||||
from minio.error import S3Error
|
||||
|
||||
# Create a connection
|
||||
connection = op.get_bind()
|
||||
session = Session(bind=connection)
|
||||
|
||||
# Get the current schema name (which should be the tenant ID)
|
||||
current_schema = connection.execute(text("SELECT current_schema()")).scalar()
|
||||
tenant_id = int(current_schema)
|
||||
|
||||
doc_versions = session.query(DocumentVersion).all()
|
||||
for doc_version in doc_versions:
|
||||
try:
|
||||
object_name = minio_client.generate_object_name(doc_version.doc_id,
|
||||
doc_version.language,
|
||||
doc_version.id,
|
||||
doc_version.file_name)
|
||||
bucket_name = minio_client.generate_bucket_name(tenant_id)
|
||||
doc_version.object_name = object_name
|
||||
doc_version.bucket_name = bucket_name
|
||||
|
||||
try:
|
||||
stat = minio_client.client.stat_object(
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name
|
||||
)
|
||||
doc_version.file_size = stat.size / 1048576
|
||||
current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
|
||||
except S3Error as e:
|
||||
if e.code == "NoSuchKey":
|
||||
current_app.logger.warning(
|
||||
f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
|
||||
continue # Move to the next item
|
||||
else:
|
||||
raise e # Handle other types of S3 errors
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
|
||||
f"Tenant {tenant_id}. Error: {str(e)}")
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
|
||||
# # ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
|
||||
# from common.models.document import DocumentVersion
|
||||
# from common.extensions import minio_client
|
||||
# from minio.error import S3Error
|
||||
#
|
||||
# # Create a connection
|
||||
# connection = op.get_bind()
|
||||
# session = Session(bind=connection)
|
||||
#
|
||||
# # Get the current schema name (which should be the tenant ID)
|
||||
# current_schema = connection.execute(text("SELECT current_schema()")).scalar()
|
||||
# tenant_id = int(current_schema)
|
||||
#
|
||||
# doc_versions = session.query(DocumentVersion).all()
|
||||
# for doc_version in doc_versions:
|
||||
# try:
|
||||
# object_name = minio_client.generate_object_name(doc_version.doc_id,
|
||||
# doc_version.language,
|
||||
# doc_version.id,
|
||||
# doc_version.file_name)
|
||||
# bucket_name = minio_client.generate_bucket_name(tenant_id)
|
||||
# doc_version.object_name = object_name
|
||||
# doc_version.bucket_name = bucket_name
|
||||
#
|
||||
# try:
|
||||
# stat = minio_client.client.stat_object(
|
||||
# bucket_name=bucket_name,
|
||||
# object_name=object_name
|
||||
# )
|
||||
# doc_version.file_size = stat.size / 1048576
|
||||
# current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
|
||||
# except S3Error as e:
|
||||
# if e.code == "NoSuchKey":
|
||||
# current_app.logger.warning(
|
||||
# f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
|
||||
# continue # Move to the next item
|
||||
# else:
|
||||
# raise e # Handle other types of S3 errors
|
||||
# except Exception as e:
|
||||
# session.rollback()
|
||||
# current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
|
||||
# f"Tenant {tenant_id}. Error: {str(e)}")
|
||||
#
|
||||
# try:
|
||||
# session.commit()
|
||||
# current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
|
||||
# except Exception as e:
|
||||
# session.rollback()
|
||||
# current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
|
||||
|
||||
# ### commands auto generated by Alembic - Remove old fields ###
|
||||
# op.drop_column('document_version', 'file_location')
|
||||
|
||||
Reference in New Issue
Block a user