- Modernized authentication with the introduction of TenantProject
- Created a base mail template - Adapt and improve document API to usage of catalogs and processors - Adapt eveai_sync to new authentication mechanism and usage of catalogs and processors
This commit is contained in:
@@ -3,27 +3,35 @@ from datetime import datetime as dt, timezone as tz
|
||||
from sqlalchemy import desc
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from werkzeug.utils import secure_filename
|
||||
from common.models.document import Document, DocumentVersion
|
||||
from common.models.document import Document, DocumentVersion, Catalog
|
||||
from common.extensions import db, minio_client
|
||||
from common.utils.celery_utils import current_celery
|
||||
from flask import current_app
|
||||
from flask_security import current_user
|
||||
import requests
|
||||
from urllib.parse import urlparse, unquote
|
||||
from urllib.parse import urlparse, unquote, urlunparse
|
||||
import os
|
||||
from .eveai_exceptions import EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType
|
||||
from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
||||
EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion)
|
||||
from ..models.user import Tenant
|
||||
|
||||
|
||||
def create_document_stack(api_input, file, filename, extension, tenant_id):
|
||||
# Create the Document
|
||||
catalog_id = int(api_input.get('catalog_id'))
|
||||
catalog = Catalog.query.get(catalog_id)
|
||||
if not catalog:
|
||||
raise EveAIInvalidCatalog(tenant_id, catalog_id)
|
||||
new_doc = create_document(api_input, filename, catalog_id)
|
||||
db.session.add(new_doc)
|
||||
|
||||
url = api_input.get('url', '')
|
||||
if url != '':
|
||||
url = cope_with_local_url(api_input.get('url', ''))
|
||||
|
||||
# Create the DocumentVersion
|
||||
new_doc_vers = create_version_for_document(new_doc, tenant_id,
|
||||
api_input.get('url', ''),
|
||||
url,
|
||||
api_input.get('sub_file_type', ''),
|
||||
api_input.get('language', 'en'),
|
||||
api_input.get('user_context', ''),
|
||||
@@ -65,7 +73,8 @@ def create_document(form, filename, catalog_id):
|
||||
return new_doc
|
||||
|
||||
|
||||
def create_version_for_document(document, tenant_id, url, sub_file_type, language, user_context, user_metadata, catalog_properties):
|
||||
def create_version_for_document(document, tenant_id, url, sub_file_type, language, user_context, user_metadata,
|
||||
catalog_properties):
|
||||
new_doc_vers = DocumentVersion()
|
||||
if url != '':
|
||||
new_doc_vers.url = url
|
||||
@@ -167,6 +176,8 @@ def get_extension_from_content_type(content_type):
|
||||
|
||||
|
||||
def process_url(url, tenant_id):
|
||||
url = cope_with_local_url(url)
|
||||
|
||||
response = requests.head(url, allow_redirects=True)
|
||||
content_type = response.headers.get('Content-Type', '').split(';')[0]
|
||||
|
||||
@@ -198,38 +209,6 @@ def process_url(url, tenant_id):
|
||||
return file_content, filename, extension
|
||||
|
||||
|
||||
def process_multiple_urls(urls, tenant_id, api_input):
|
||||
results = []
|
||||
for url in urls:
|
||||
try:
|
||||
file_content, filename, extension = process_url(url, tenant_id)
|
||||
|
||||
url_input = api_input.copy()
|
||||
url_input.update({
|
||||
'url': url,
|
||||
'name': f"{api_input['name']}-{filename}" if api_input['name'] else filename
|
||||
})
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(url_input, file_content, filename, extension, tenant_id)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
results.append({
|
||||
'url': url,
|
||||
'document_id': new_doc.id,
|
||||
'document_version_id': new_doc_vers.id,
|
||||
'task_id': task_id,
|
||||
'status': 'success'
|
||||
})
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
|
||||
results.append({
|
||||
'url': url,
|
||||
'status': 'error',
|
||||
'message': str(e)
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def start_embedding_task(tenant_id, doc_vers_id):
|
||||
task = current_celery.send_task('create_embeddings',
|
||||
args=[tenant_id, doc_vers_id,],
|
||||
@@ -263,11 +242,16 @@ def get_documents_list(page, per_page):
|
||||
return pagination
|
||||
|
||||
|
||||
def edit_document(document_id, name, valid_from, valid_to):
|
||||
doc = Document.query.get_or_404(document_id)
|
||||
doc.name = name
|
||||
doc.valid_from = valid_from
|
||||
doc.valid_to = valid_to
|
||||
def edit_document(tenant_id, document_id, name, valid_from, valid_to):
|
||||
doc = Document.query.get(document_id)
|
||||
if not doc:
|
||||
raise EveAIInvalidDocument(tenant_id, document_id)
|
||||
if name:
|
||||
doc.name = name
|
||||
if valid_from:
|
||||
doc.valid_from = valid_from
|
||||
if valid_to:
|
||||
doc.valid_to = valid_to
|
||||
update_logging_information(doc, dt.now(tz.utc))
|
||||
|
||||
try:
|
||||
@@ -279,8 +263,10 @@ def edit_document(document_id, name, valid_from, valid_to):
|
||||
return None, str(e)
|
||||
|
||||
|
||||
def edit_document_version(version_id, user_context, catalog_properties):
|
||||
doc_vers = DocumentVersion.query.get_or_404(version_id)
|
||||
def edit_document_version(tenant_id, version_id, user_context, catalog_properties):
|
||||
doc_vers = DocumentVersion.query.get(version_id)
|
||||
if not doc_vers:
|
||||
raise EveAIInvalidDocumentVersion(tenant_id, version_id)
|
||||
doc_vers.user_context = user_context
|
||||
doc_vers.catalog_properties = catalog_properties
|
||||
update_logging_information(doc_vers, dt.now(tz.utc))
|
||||
@@ -295,15 +281,17 @@ def edit_document_version(version_id, user_context, catalog_properties):
|
||||
|
||||
|
||||
def refresh_document_with_info(doc_id, tenant_id, api_input):
|
||||
doc = Document.query.get_or_404(doc_id)
|
||||
doc = Document.query.get(doc_id)
|
||||
if not doc:
|
||||
raise EveAIInvalidDocument(tenant_id, doc_id)
|
||||
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
||||
|
||||
if not old_doc_vers.url:
|
||||
return None, "This document has no URL. Only documents with a URL can be refreshed."
|
||||
|
||||
new_doc_vers = create_version_for_document(
|
||||
doc, tenant_id,
|
||||
old_doc_vers.url,
|
||||
old_doc_vers.sub_file_type,
|
||||
api_input.get('language', old_doc_vers.language),
|
||||
api_input.get('user_context', old_doc_vers.user_context),
|
||||
api_input.get('user_metadata', old_doc_vers.user_metadata),
|
||||
@@ -319,11 +307,12 @@ def refresh_document_with_info(doc_id, tenant_id, api_input):
|
||||
db.session.rollback()
|
||||
return None, str(e)
|
||||
|
||||
response = requests.head(old_doc_vers.url, allow_redirects=True)
|
||||
url = cope_with_local_url(old_doc_vers.url)
|
||||
response = requests.head(url, allow_redirects=True)
|
||||
content_type = response.headers.get('Content-Type', '').split(';')[0]
|
||||
extension = get_extension_from_content_type(content_type)
|
||||
|
||||
response = requests.get(old_doc_vers.url)
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
file_content = response.content
|
||||
|
||||
@@ -359,3 +348,18 @@ def mark_tenant_storage_dirty(tenant_id):
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def cope_with_local_url(url):
|
||||
current_app.logger.debug(f'Incomming URL: {url}')
|
||||
parsed_url = urlparse(url)
|
||||
# Check if this is an internal WordPress URL (TESTING) and rewrite it
|
||||
if parsed_url.netloc in [current_app.config['EXTERNAL_WORDPRESS_BASE_URL']]:
|
||||
parsed_url = parsed_url._replace(
|
||||
scheme=current_app.config['WORDPRESS_PROTOCOL'],
|
||||
netloc=f"{current_app.config['WORDPRESS_HOST']}:{current_app.config['WORDPRESS_PORT']}"
|
||||
)
|
||||
url = urlunparse(parsed_url)
|
||||
current_app.logger.debug(f'Translated Wordpress URL to: {url}')
|
||||
|
||||
return url
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user