- Modernized authentication with the introduction of TenantProject

- Created a base mail template
- Adapt and improve document API to usage of catalogs and processors
- Adapt eveai_sync to new authentication mechanism and usage of catalogs and processors
This commit is contained in:
Josako
2024-11-21 17:24:33 +01:00
parent 4c009949b3
commit 7702a6dfcc
72 changed files with 2338 additions and 503 deletions

View File

@@ -19,7 +19,8 @@ def get_redis_config(app):
'port': int(redis_uri.port or 6379),
'db': 4, # Keep this for later use
'redis_expiration_time': 3600,
'distributed_lock': True
'distributed_lock': True,
'thread_local_lock': False,
}
# Add authentication if provided

View File

@@ -3,27 +3,35 @@ from datetime import datetime as dt, timezone as tz
from sqlalchemy import desc
from sqlalchemy.exc import SQLAlchemyError
from werkzeug.utils import secure_filename
from common.models.document import Document, DocumentVersion
from common.models.document import Document, DocumentVersion, Catalog
from common.extensions import db, minio_client
from common.utils.celery_utils import current_celery
from flask import current_app
from flask_security import current_user
import requests
from urllib.parse import urlparse, unquote
from urllib.parse import urlparse, unquote, urlunparse
import os
from .eveai_exceptions import EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType
from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion)
from ..models.user import Tenant
def create_document_stack(api_input, file, filename, extension, tenant_id):
# Create the Document
catalog_id = int(api_input.get('catalog_id'))
catalog = Catalog.query.get(catalog_id)
if not catalog:
raise EveAIInvalidCatalog(tenant_id, catalog_id)
new_doc = create_document(api_input, filename, catalog_id)
db.session.add(new_doc)
url = api_input.get('url', '')
if url != '':
url = cope_with_local_url(api_input.get('url', ''))
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, tenant_id,
api_input.get('url', ''),
url,
api_input.get('sub_file_type', ''),
api_input.get('language', 'en'),
api_input.get('user_context', ''),
@@ -65,7 +73,8 @@ def create_document(form, filename, catalog_id):
return new_doc
def create_version_for_document(document, tenant_id, url, sub_file_type, language, user_context, user_metadata, catalog_properties):
def create_version_for_document(document, tenant_id, url, sub_file_type, language, user_context, user_metadata,
catalog_properties):
new_doc_vers = DocumentVersion()
if url != '':
new_doc_vers.url = url
@@ -167,6 +176,8 @@ def get_extension_from_content_type(content_type):
def process_url(url, tenant_id):
url = cope_with_local_url(url)
response = requests.head(url, allow_redirects=True)
content_type = response.headers.get('Content-Type', '').split(';')[0]
@@ -198,38 +209,6 @@ def process_url(url, tenant_id):
return file_content, filename, extension
def process_multiple_urls(urls, tenant_id, api_input):
results = []
for url in urls:
try:
file_content, filename, extension = process_url(url, tenant_id)
url_input = api_input.copy()
url_input.update({
'url': url,
'name': f"{api_input['name']}-{filename}" if api_input['name'] else filename
})
new_doc, new_doc_vers = create_document_stack(url_input, file_content, filename, extension, tenant_id)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
results.append({
'url': url,
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id,
'status': 'success'
})
except Exception as e:
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
results.append({
'url': url,
'status': 'error',
'message': str(e)
})
return results
def start_embedding_task(tenant_id, doc_vers_id):
task = current_celery.send_task('create_embeddings',
args=[tenant_id, doc_vers_id,],
@@ -263,11 +242,16 @@ def get_documents_list(page, per_page):
return pagination
def edit_document(document_id, name, valid_from, valid_to):
doc = Document.query.get_or_404(document_id)
doc.name = name
doc.valid_from = valid_from
doc.valid_to = valid_to
def edit_document(tenant_id, document_id, name, valid_from, valid_to):
doc = Document.query.get(document_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, document_id)
if name:
doc.name = name
if valid_from:
doc.valid_from = valid_from
if valid_to:
doc.valid_to = valid_to
update_logging_information(doc, dt.now(tz.utc))
try:
@@ -279,8 +263,10 @@ def edit_document(document_id, name, valid_from, valid_to):
return None, str(e)
def edit_document_version(version_id, user_context, catalog_properties):
doc_vers = DocumentVersion.query.get_or_404(version_id)
def edit_document_version(tenant_id, version_id, user_context, catalog_properties):
doc_vers = DocumentVersion.query.get(version_id)
if not doc_vers:
raise EveAIInvalidDocumentVersion(tenant_id, version_id)
doc_vers.user_context = user_context
doc_vers.catalog_properties = catalog_properties
update_logging_information(doc_vers, dt.now(tz.utc))
@@ -295,15 +281,17 @@ def edit_document_version(version_id, user_context, catalog_properties):
def refresh_document_with_info(doc_id, tenant_id, api_input):
doc = Document.query.get_or_404(doc_id)
doc = Document.query.get(doc_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
if not old_doc_vers.url:
return None, "This document has no URL. Only documents with a URL can be refreshed."
new_doc_vers = create_version_for_document(
doc, tenant_id,
old_doc_vers.url,
old_doc_vers.sub_file_type,
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata),
@@ -319,11 +307,12 @@ def refresh_document_with_info(doc_id, tenant_id, api_input):
db.session.rollback()
return None, str(e)
response = requests.head(old_doc_vers.url, allow_redirects=True)
url = cope_with_local_url(old_doc_vers.url)
response = requests.head(url, allow_redirects=True)
content_type = response.headers.get('Content-Type', '').split(';')[0]
extension = get_extension_from_content_type(content_type)
response = requests.get(old_doc_vers.url)
response = requests.get(url)
response.raise_for_status()
file_content = response.content
@@ -359,3 +348,18 @@ def mark_tenant_storage_dirty(tenant_id):
db.session.commit()
def cope_with_local_url(url):
current_app.logger.debug(f'Incomming URL: {url}')
parsed_url = urlparse(url)
# Check if this is an internal WordPress URL (TESTING) and rewrite it
if parsed_url.netloc in [current_app.config['EXTERNAL_WORDPRESS_BASE_URL']]:
parsed_url = parsed_url._replace(
scheme=current_app.config['WORDPRESS_PROTOCOL'],
netloc=f"{current_app.config['WORDPRESS_HOST']}:{current_app.config['WORDPRESS_PORT']}"
)
url = urlunparse(parsed_url)
current_app.logger.debug(f'Translated Wordpress URL to: {url}')
return url

View File

@@ -13,6 +13,9 @@ class EveAIException(Exception):
rv['error'] = self.__class__.__name__
return rv
def __str__(self):
return self.message # Return the message when the exception is converted to a string
class EveAIInvalidLanguageException(EveAIException):
"""Raised when an invalid language is provided"""
@@ -45,6 +48,73 @@ class EveAINoLicenseForTenant(EveAIException):
class EveAITenantNotFound(EveAIException):
"""Raised when a tenant is not found"""
def __init__(self, message="Tenant not found", status_code=400, payload=None):
def __init__(self, tenant_id, status_code=400, payload=None):
self.tenant_id = tenant_id
message = f"Tenant {tenant_id} not found"
super().__init__(message, status_code, payload)
class EveAITenantInvalid(EveAIException):
"""Raised when a tenant is invalid"""
def __init__(self, tenant_id, status_code=400, payload=None):
self.tenant_id = tenant_id
# Construct the message dynamically
message = f"Tenant with ID '{tenant_id}' is not valid. Please contact the System Administrator."
super().__init__(message, status_code, payload)
class EveAINoActiveLicense(EveAIException):
"""Raised when a tenant has no active licenses"""
def __init__(self, tenant_id, status_code=400, payload=None):
self.tenant_id = tenant_id
# Construct the message dynamically
message = f"Tenant with ID '{tenant_id}' has no active licenses. Please contact the System Administrator."
super().__init__(message, status_code, payload)
class EveAIInvalidCatalog(EveAIException):
"""Raised when a catalog cannot be found"""
def __init__(self, tenant_id, catalog_id, status_code=400, payload=None):
self.tenant_id = tenant_id
self.catalog_id = catalog_id
# Construct the message dynamically
message = f"Tenant with ID '{tenant_id}' has no valid catalog with ID {catalog_id}. Please contact the System Administrator."
super().__init__(message, status_code, payload)
class EveAIInvalidProcessor(EveAIException):
"""Raised when no valid processor can be found for a given Catalog ID"""
def __init__(self, tenant_id, catalog_id, file_type, status_code=400, payload=None):
self.tenant_id = tenant_id
self.catalog_id = catalog_id
self.file_type = file_type
# Construct the message dynamically
message = (f"Tenant with ID '{tenant_id}' has no valid {file_type} processor for catalog with ID {catalog_id}. "
f"Please contact the System Administrator.")
super().__init__(message, status_code, payload)
class EveAIInvalidDocument(EveAIException):
"""Raised when a tenant has no document with given ID"""
def __init__(self, tenant_id, document_id, status_code=400, payload=None):
self.tenant_id = tenant_id
self.document_id = document_id
# Construct the message dynamically
message = f"Tenant with ID '{tenant_id}' has no document with ID {document_id}."
super().__init__(message, status_code, payload)
class EveAIInvalidDocumentVersion(EveAIException):
"""Raised when a tenant has no document version with given ID"""
def __init__(self, tenant_id, document_version_id, status_code=400, payload=None):
self.tenant_id = tenant_id
self.document_version_id = document_version_id
# Construct the message dynamically
message = f"Tenant with ID '{tenant_id}' has no document version with ID {document_version_id}."
super().__init__(message, status_code, payload)

View File

@@ -82,7 +82,7 @@ class ModelVariables:
tenant = Tenant.query.get(self.tenant_id)
if not tenant:
raise EveAITenantNotFound(f"Tenant {self.tenant_id} not found")
raise EveAITenantNotFound(self.tenant_id)
# Set model providers
variables['embedding_provider'], variables['embedding_model'] = tenant.embedding_model.split('.')

View File

@@ -1,5 +1,11 @@
from flask import session, current_app
from sqlalchemy import and_
from common.models.user import Tenant
from common.models.entitlements import License
from common.utils.database import Database
from common.utils.eveai_exceptions import EveAITenantNotFound, EveAITenantInvalid, EveAINoActiveLicense
from datetime import datetime as dt, timezone as tz
# Definition of Trigger Handlers
@@ -15,4 +21,25 @@ def clear_tenant_session_data(sender, user, **kwargs):
session.pop('tenant', None)
session.pop('default_language', None)
session.pop('default_embedding_model', None)
session.pop('default_llm_model', None)
session.pop('default_llm_model', None)
def is_valid_tenant(tenant_id):
if tenant_id == 1: # The 'root' tenant, is always valid
return True
tenant = Tenant.query.get(tenant_id)
Database(tenant).switch_schema()
if tenant is None:
raise EveAITenantNotFound()
elif tenant.type == 'Inactive':
raise EveAITenantInvalid(tenant_id)
else:
current_date = dt.now(tz=tz.utc).date()
active_license = (License.query.filter_by(tenant_id=tenant_id)
.filter(and_(License.start_date <= current_date,
License.end_date >= current_date))
.one_or_none())
if not active_license:
raise EveAINoActiveLicense(tenant_id)
return True

View File

@@ -93,4 +93,3 @@ def test_smtp_connection():
except Exception as e:
current_app.logger.error(f"Failed to connect to SMTP server: {str(e)}")
return False

View File

@@ -4,7 +4,7 @@ from flask import Flask
def generate_api_key(prefix="EveAI-Chat"):
parts = [str(random.randint(1000, 9999)) for _ in range(5)]
parts = [str(random.randint(1000, 9999)) for _ in range(8)]
return f"{prefix}-{'-'.join(parts)}"