- Modernized authentication with the introduction of TenantProject

- Created a base mail template
- Adapt and improve document API to usage of catalogs and processors
- Adapt eveai_sync to new authentication mechanism and usage of catalogs and processors
This commit is contained in:
Josako
2024-11-21 17:24:33 +01:00
parent 4c009949b3
commit 7702a6dfcc
72 changed files with 2338 additions and 503 deletions

View File

@@ -3,27 +3,35 @@ from datetime import datetime as dt, timezone as tz
from sqlalchemy import desc
from sqlalchemy.exc import SQLAlchemyError
from werkzeug.utils import secure_filename
from common.models.document import Document, DocumentVersion
from common.models.document import Document, DocumentVersion, Catalog
from common.extensions import db, minio_client
from common.utils.celery_utils import current_celery
from flask import current_app
from flask_security import current_user
import requests
from urllib.parse import urlparse, unquote
from urllib.parse import urlparse, unquote, urlunparse
import os
from .eveai_exceptions import EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType
from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion)
from ..models.user import Tenant
def create_document_stack(api_input, file, filename, extension, tenant_id):
# Create the Document
catalog_id = int(api_input.get('catalog_id'))
catalog = Catalog.query.get(catalog_id)
if not catalog:
raise EveAIInvalidCatalog(tenant_id, catalog_id)
new_doc = create_document(api_input, filename, catalog_id)
db.session.add(new_doc)
url = api_input.get('url', '')
if url != '':
url = cope_with_local_url(api_input.get('url', ''))
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, tenant_id,
api_input.get('url', ''),
url,
api_input.get('sub_file_type', ''),
api_input.get('language', 'en'),
api_input.get('user_context', ''),
@@ -65,7 +73,8 @@ def create_document(form, filename, catalog_id):
return new_doc
def create_version_for_document(document, tenant_id, url, sub_file_type, language, user_context, user_metadata, catalog_properties):
def create_version_for_document(document, tenant_id, url, sub_file_type, language, user_context, user_metadata,
catalog_properties):
new_doc_vers = DocumentVersion()
if url != '':
new_doc_vers.url = url
@@ -167,6 +176,8 @@ def get_extension_from_content_type(content_type):
def process_url(url, tenant_id):
url = cope_with_local_url(url)
response = requests.head(url, allow_redirects=True)
content_type = response.headers.get('Content-Type', '').split(';')[0]
@@ -198,38 +209,6 @@ def process_url(url, tenant_id):
return file_content, filename, extension
def process_multiple_urls(urls, tenant_id, api_input):
results = []
for url in urls:
try:
file_content, filename, extension = process_url(url, tenant_id)
url_input = api_input.copy()
url_input.update({
'url': url,
'name': f"{api_input['name']}-{filename}" if api_input['name'] else filename
})
new_doc, new_doc_vers = create_document_stack(url_input, file_content, filename, extension, tenant_id)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
results.append({
'url': url,
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id,
'status': 'success'
})
except Exception as e:
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
results.append({
'url': url,
'status': 'error',
'message': str(e)
})
return results
def start_embedding_task(tenant_id, doc_vers_id):
task = current_celery.send_task('create_embeddings',
args=[tenant_id, doc_vers_id,],
@@ -263,11 +242,16 @@ def get_documents_list(page, per_page):
return pagination
def edit_document(document_id, name, valid_from, valid_to):
doc = Document.query.get_or_404(document_id)
doc.name = name
doc.valid_from = valid_from
doc.valid_to = valid_to
def edit_document(tenant_id, document_id, name, valid_from, valid_to):
doc = Document.query.get(document_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, document_id)
if name:
doc.name = name
if valid_from:
doc.valid_from = valid_from
if valid_to:
doc.valid_to = valid_to
update_logging_information(doc, dt.now(tz.utc))
try:
@@ -279,8 +263,10 @@ def edit_document(document_id, name, valid_from, valid_to):
return None, str(e)
def edit_document_version(version_id, user_context, catalog_properties):
doc_vers = DocumentVersion.query.get_or_404(version_id)
def edit_document_version(tenant_id, version_id, user_context, catalog_properties):
doc_vers = DocumentVersion.query.get(version_id)
if not doc_vers:
raise EveAIInvalidDocumentVersion(tenant_id, version_id)
doc_vers.user_context = user_context
doc_vers.catalog_properties = catalog_properties
update_logging_information(doc_vers, dt.now(tz.utc))
@@ -295,15 +281,17 @@ def edit_document_version(version_id, user_context, catalog_properties):
def refresh_document_with_info(doc_id, tenant_id, api_input):
doc = Document.query.get_or_404(doc_id)
doc = Document.query.get(doc_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
if not old_doc_vers.url:
return None, "This document has no URL. Only documents with a URL can be refreshed."
new_doc_vers = create_version_for_document(
doc, tenant_id,
old_doc_vers.url,
old_doc_vers.sub_file_type,
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata),
@@ -319,11 +307,12 @@ def refresh_document_with_info(doc_id, tenant_id, api_input):
db.session.rollback()
return None, str(e)
response = requests.head(old_doc_vers.url, allow_redirects=True)
url = cope_with_local_url(old_doc_vers.url)
response = requests.head(url, allow_redirects=True)
content_type = response.headers.get('Content-Type', '').split(';')[0]
extension = get_extension_from_content_type(content_type)
response = requests.get(old_doc_vers.url)
response = requests.get(url)
response.raise_for_status()
file_content = response.content
@@ -359,3 +348,18 @@ def mark_tenant_storage_dirty(tenant_id):
db.session.commit()
def cope_with_local_url(url):
current_app.logger.debug(f'Incomming URL: {url}')
parsed_url = urlparse(url)
# Check if this is an internal WordPress URL (TESTING) and rewrite it
if parsed_url.netloc in [current_app.config['EXTERNAL_WORDPRESS_BASE_URL']]:
parsed_url = parsed_url._replace(
scheme=current_app.config['WORDPRESS_PROTOCOL'],
netloc=f"{current_app.config['WORDPRESS_HOST']}:{current_app.config['WORDPRESS_PORT']}"
)
url = urlunparse(parsed_url)
current_app.logger.debug(f'Translated Wordpress URL to: {url}')
return url