- Improvements to enable deployment in the cloud, mainly changing file access to Minio

- Improvements on RAG logging, and some debugging in that area
This commit is contained in:
Josako
2024-08-01 17:35:54 +02:00
parent 88ca04136d
commit 64cf8df3a9
19 changed files with 617 additions and 206 deletions

View File

@@ -6,7 +6,8 @@ from flask_security.signals import user_authenticated
from werkzeug.middleware.proxy_fix import ProxyFix
import logging.config
from common.extensions import db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session
from common.extensions import (db, migrate, bootstrap, security, mail, login_manager, cors, kms_client, csrf, session,
minio_client)
from common.models.user import User, Role, Tenant, TenantDomain
import common.models.interaction
from config.logging_config import LOGGING
@@ -102,6 +103,7 @@ def register_extensions(app):
cors.init_app(app)
kms_client.init_app(app)
session.init_app(app)
minio_client.init_app(app)
# Register Blueprints

View File

@@ -14,9 +14,10 @@ import requests
from requests.exceptions import SSLError
from urllib.parse import urlparse
import io
from minio.error import S3Error
from common.models.document import Document, DocumentVersion
from common.extensions import db
from common.extensions import db, minio_client
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
AddURLsForm
from common.utils.middleware import mw_before_request
@@ -558,33 +559,34 @@ def upload_file_for_version(doc_vers, file, extension):
doc_vers.file_name = doc_vers.calc_file_name()
doc_vers.file_location = doc_vers.calc_file_location()
upload_path = os.path.join(current_app.config['UPLOAD_FOLDER'], doc_vers.file_location)
if not os.path.exists(upload_path):
os.makedirs(upload_path, exist_ok=True)
if isinstance(file, FileStorage):
file.save(os.path.join(upload_path, doc_vers.file_name))
elif isinstance(file, io.BytesIO):
# It's a BytesIO object, handle accordingly
# Example: write content to a file manually
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
f.write(file.getvalue())
elif isinstance(file, str):
# It's a string, handle accordingly
with open(os.path.join(upload_path, doc_vers.file_name), 'w') as f:
f.write(file)
else:
raise TypeError('Unsupported file type.')
# Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
tenant_id = session['tenant']['id']
minio_client.create_tenant_bucket(tenant_id)
try:
minio_client.upload_document_file(
tenant_id,
doc_vers.doc_id,
doc_vers.language,
doc_vers.id,
doc_vers.file_name,
file
)
db.session.commit()
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
f'document version {doc_vers.id} while uploading file.')
except S3Error as e:
db.session.rollback()
flash('Error saving document to MinIO.', 'error')
current_app.logger.error(
f'Error saving document to MinIO for tenant {tenant_id}: {e}')
raise
except SQLAlchemyError as e:
db.session.rollback()
flash('Error saving document.', 'error')
flash('Error saving document metadata.', 'error')
current_app.logger.error(
f'Error saving document for tenant {session["tenant"]["id"]} while uploading file: {e}')
current_app.logger.info(f'Succesfully saved document for tenant {session['tenant']['id']} for '
f'document version {doc_vers.id} while uploading file.')
f'Error saving document metadata for tenant {tenant_id}: {e}')
raise
def fetch_html(url):

View File

@@ -8,7 +8,7 @@ from sqlalchemy.exc import SQLAlchemyError
import ast
from common.models.user import User, Tenant, Role, TenantDomain
from common.extensions import db, kms_client, security
from common.extensions import db, kms_client, security, minio_client
from common.utils.security_utils import send_confirmation_email, send_reset_email
from .user_forms import TenantForm, CreateUserForm, EditUserForm, TenantDomainForm
from common.utils.database import Database
@@ -61,12 +61,13 @@ def tenant():
# rag_tuning=form.rag_tuning.data)
# Handle Embedding Variables
new_tenant.html_tags = form.html_tags.data.split(',') if form.html_tags.data else []
new_tenant.html_end_tags = form.html_end_tags.data.split(',') if form.html_end_tags.data else []
new_tenant.html_included_elements = form.html_included_elements.data.split(
',') if form.html_included_elements.data else []
new_tenant.html_excluded_elements = form.html_excluded_elements.data.split(
',') if form.html_excluded_elements.data else []
new_tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
new_tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
new_tenant.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
f'html_end_tags: {new_tenant.html_end_tags},'
@@ -87,11 +88,17 @@ def tenant():
flash(f'Failed to add tenant to database. Error: {str(e)}')
return render_template('user/tenant.html', form=form)
# Create schema for new tenant
current_app.logger.info(f"Successfully created tenant {new_tenant.id} in Database")
flash(f"Successfully created tenant {new_tenant.id} in Database")
# Create schema for new tenant
current_app.logger.info(f"Creating schema for tenant {new_tenant.id}")
Database(new_tenant.id).create_tenant_schema()
# Create MinIO bucket for new tenant
current_app.logger.info(f"Creating MinIO bucket for tenant {new_tenant.id}")
minio_client.create_tenant_bucket(new_tenant.id)
return redirect(prefixed_url_for('basic_bp.index'))
else:
form_validation_failed(request, form)