- Zapier Document Refresh action (create) added

This commit is contained in:
Josako
2024-12-17 16:40:21 +01:00
parent 53c625599a
commit f7cd58ed2a
9 changed files with 381 additions and 194 deletions

View File

@@ -11,6 +11,8 @@ from flask_security import current_user
import requests
from urllib.parse import urlparse, unquote, urlunparse
import os
from .config_field_types import normalize_json_field
from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion, EveAIException)
from ..models.user import Tenant
@@ -88,10 +90,10 @@ def create_version_for_document(document, tenant_id, url, sub_file_type, langua
new_doc_vers.user_context = user_context
if user_metadata != '' and user_metadata is not None:
new_doc_vers.user_metadata = user_metadata
new_doc_vers.user_metadata = normalize_json_field(user_metadata, "user_metadata")
if catalog_properties != '' and catalog_properties is not None:
new_doc_vers.catalog_properties = catalog_properties
new_doc_vers.catalog_properties = normalize_json_field(catalog_properties, "catalog_properties")
if sub_file_type != '':
new_doc_vers.sub_file_type = sub_file_type
@@ -262,7 +264,8 @@ def edit_document_version(tenant_id, version_id, user_context, catalog_propertie
if not doc_vers:
raise EveAIInvalidDocumentVersion(tenant_id, version_id)
doc_vers.user_context = user_context
doc_vers.catalog_properties = catalog_properties
doc_vers.catalog_properties = normalize_json_field(catalog_properties, "catalog_properties")
update_logging_information(doc_vers, dt.now(tz.utc))
try:
@@ -319,6 +322,56 @@ def refresh_document_with_info(doc_id, tenant_id, api_input):
return new_doc_vers, task.id
def refresh_document_with_content(doc_id: int, tenant_id: int, file_content: bytes, api_input: dict) -> tuple:
"""
Refresh document with new content
Args:
doc_id: Document ID
tenant_id: Tenant ID
file_content: New file content
api_input: Additional document information
Returns:
Tuple of (new_version, task_id)
"""
doc = Document.query.get(doc_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
# Create new version with same file type as original
extension = old_doc_vers.file_type
new_doc_vers = create_version_for_document(
doc, tenant_id,
'', # No URL for content-based updates
old_doc_vers.sub_file_type,
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata),
api_input.get('catalog_properties', old_doc_vers.catalog_properties),
)
try:
db.session.add(new_doc_vers)
db.session.commit()
except SQLAlchemyError as e:
db.session.rollback()
return None, str(e)
# Upload new content
upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
# Start embedding task
task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id], queue='embeddings')
current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
f'with task id: {task.id}.')
return new_doc_vers, task.id
# Update the existing refresh_document function to use the new refresh_document_with_info
def refresh_document(doc_id, tenant_id):
current_app.logger.info(f'Refreshing document {doc_id}')
@@ -388,6 +441,10 @@ def lookup_document(tenant_id: int, lookup_criteria: dict, metadata_type: str) -
for key, value in lookup_criteria.items():
query = query.filter(metadata_field[key].astext == str(value))
# Log the final SQL query
current_app.logger.debug(
f"Final SQL query: {query.statement.compile(compile_kwargs={'literal_binds': True})}")
# Get first result
result = query.first()
@@ -411,55 +468,3 @@ def lookup_document(tenant_id: int, lookup_criteria: dict, metadata_type: str) -
"Error during document lookup",
status_code=500
)
# Add to common/utils/document_utils.py
def refresh_document_with_content(doc_id: int, tenant_id: int, file_content: bytes, api_input: dict) -> tuple:
"""
Refresh document with new content
Args:
doc_id: Document ID
tenant_id: Tenant ID
file_content: New file content
api_input: Additional document information
Returns:
Tuple of (new_version, task_id)
"""
doc = Document.query.get(doc_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
# Create new version with same file type as original
extension = old_doc_vers.file_type
new_doc_vers = create_version_for_document(
doc, tenant_id,
'', # No URL for content-based updates
old_doc_vers.sub_file_type,
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata),
api_input.get('catalog_properties', old_doc_vers.catalog_properties),
)
try:
db.session.add(new_doc_vers)
db.session.commit()
except SQLAlchemyError as e:
db.session.rollback()
return None, str(e)
# Upload new content
upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
# Start embedding task
task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id], queue='embeddings')
current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
f'with task id: {task.id}.')
return new_doc_vers, task.id