- Improvements to enable deployment in the cloud, mainly changing file access to Minio

- Improvements on RAG logging, and some debugging in that area
This commit is contained in:
Josako
2024-08-01 17:35:54 +02:00
parent 88ca04136d
commit 64cf8df3a9
19 changed files with 617 additions and 206 deletions

View File

@@ -14,9 +14,10 @@ import requests
from requests.exceptions import SSLError
from urllib.parse import urlparse
import io
from minio.error import S3Error
from common.models.document import Document, DocumentVersion
from common.extensions import db
from common.extensions import db, minio_client
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
AddURLsForm
from common.utils.middleware import mw_before_request
@@ -558,33 +559,34 @@ def upload_file_for_version(doc_vers, file, extension):
doc_vers.file_name = doc_vers.calc_file_name()
doc_vers.file_location = doc_vers.calc_file_location()
upload_path = os.path.join(current_app.config['UPLOAD_FOLDER'], doc_vers.file_location)
if not os.path.exists(upload_path):
os.makedirs(upload_path, exist_ok=True)
if isinstance(file, FileStorage):
file.save(os.path.join(upload_path, doc_vers.file_name))
elif isinstance(file, io.BytesIO):
# It's a BytesIO object, handle accordingly
# Example: write content to a file manually
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
f.write(file.getvalue())
elif isinstance(file, str):
# It's a string, handle accordingly
with open(os.path.join(upload_path, doc_vers.file_name), 'w') as f:
f.write(file)
else:
raise TypeError('Unsupported file type.')
# Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
tenant_id = session['tenant']['id']
minio_client.create_tenant_bucket(tenant_id)
try:
minio_client.upload_document_file(
tenant_id,
doc_vers.doc_id,
doc_vers.language,
doc_vers.id,
doc_vers.file_name,
file
)
db.session.commit()
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
f'document version {doc_vers.id} while uploading file.')
except S3Error as e:
db.session.rollback()
flash('Error saving document to MinIO.', 'error')
current_app.logger.error(
f'Error saving document to MinIO for tenant {tenant_id}: {e}')
raise
except SQLAlchemyError as e:
db.session.rollback()
flash('Error saving document.', 'error')
flash('Error saving document metadata.', 'error')
current_app.logger.error(
f'Error saving document for tenant {session["tenant"]["id"]} while uploading file: {e}')
current_app.logger.info(f'Succesfully saved document for tenant {session['tenant']['id']} for '
f'document version {doc_vers.id} while uploading file.')
f'Error saving document metadata for tenant {tenant_id}: {e}')
raise
def fetch_html(url):