- Zapier Document Refresh action (create) added

2024-12-17 16:40:21 +01:00
parent 53c625599a
commit f7cd58ed2a
9 changed files with 381 additions and 194 deletions
--- a/common/utils/document_utils.py
+++ b/common/utils/document_utils.py
@@ -11,6 +11,8 @@ from flask_security import current_user
 import requests
 from urllib.parse import urlparse, unquote, urlunparse
 import os
+
+from .config_field_types import normalize_json_field
 from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
                               EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion, EveAIException)
 from ..models.user import Tenant
@@ -88,10 +90,10 @@ def create_version_for_document(document, tenant_id, url, sub_file_type,  langua
        new_doc_vers.user_context = user_context

    if user_metadata != '' and user_metadata is not None:
-        new_doc_vers.user_metadata = user_metadata
+        new_doc_vers.user_metadata = normalize_json_field(user_metadata, "user_metadata")

    if catalog_properties != '' and catalog_properties is not None:
-        new_doc_vers.catalog_properties = catalog_properties
+        new_doc_vers.catalog_properties = normalize_json_field(catalog_properties, "catalog_properties")

    if sub_file_type != '':
        new_doc_vers.sub_file_type = sub_file_type
@@ -262,7 +264,8 @@ def edit_document_version(tenant_id, version_id, user_context, catalog_propertie
    if not doc_vers:
        raise EveAIInvalidDocumentVersion(tenant_id, version_id)
    doc_vers.user_context = user_context
-    doc_vers.catalog_properties = catalog_properties
+    doc_vers.catalog_properties = normalize_json_field(catalog_properties, "catalog_properties")
+
    update_logging_information(doc_vers, dt.now(tz.utc))

    try:
@@ -319,6 +322,56 @@ def refresh_document_with_info(doc_id, tenant_id, api_input):
    return new_doc_vers, task.id


+def refresh_document_with_content(doc_id: int, tenant_id: int, file_content: bytes, api_input: dict) -> tuple:
+    """
+    Refresh document with new content
+
+    Args:
+        doc_id: Document ID
+        tenant_id: Tenant ID
+        file_content: New file content
+        api_input: Additional document information
+
+    Returns:
+        Tuple of (new_version, task_id)
+    """
+    doc = Document.query.get(doc_id)
+    if not doc:
+        raise EveAIInvalidDocument(tenant_id, doc_id)
+
+    old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
+
+    # Create new version with same file type as original
+    extension = old_doc_vers.file_type
+
+    new_doc_vers = create_version_for_document(
+        doc, tenant_id,
+        '',  # No URL for content-based updates
+        old_doc_vers.sub_file_type,
+        api_input.get('language', old_doc_vers.language),
+        api_input.get('user_context', old_doc_vers.user_context),
+        api_input.get('user_metadata', old_doc_vers.user_metadata),
+        api_input.get('catalog_properties', old_doc_vers.catalog_properties),
+    )
+
+    try:
+        db.session.add(new_doc_vers)
+        db.session.commit()
+    except SQLAlchemyError as e:
+        db.session.rollback()
+        return None, str(e)
+
+    # Upload new content
+    upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
+
+    # Start embedding task
+    task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id], queue='embeddings')
+    current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
+                            f'with task id: {task.id}.')
+
+    return new_doc_vers, task.id
+
+
 # Update the existing refresh_document function to use the new refresh_document_with_info
 def refresh_document(doc_id, tenant_id):
    current_app.logger.info(f'Refreshing document {doc_id}')
@@ -388,6 +441,10 @@ def lookup_document(tenant_id: int, lookup_criteria: dict, metadata_type: str) -
        for key, value in lookup_criteria.items():
            query = query.filter(metadata_field[key].astext == str(value))

+        # Log the final SQL query
+        current_app.logger.debug(
+            f"Final SQL query: {query.statement.compile(compile_kwargs={'literal_binds': True})}")
+
        # Get first result
        result = query.first()

@@ -411,55 +468,3 @@ def lookup_document(tenant_id: int, lookup_criteria: dict, metadata_type: str) -
            "Error during document lookup",
            status_code=500
        )
-
-
-# Add to common/utils/document_utils.py
-
-def refresh_document_with_content(doc_id: int, tenant_id: int, file_content: bytes, api_input: dict) -> tuple:
-    """
-    Refresh document with new content
-
-    Args:
-        doc_id: Document ID
-        tenant_id: Tenant ID
-        file_content: New file content
-        api_input: Additional document information
-
-    Returns:
-        Tuple of (new_version, task_id)
-    """
-    doc = Document.query.get(doc_id)
-    if not doc:
-        raise EveAIInvalidDocument(tenant_id, doc_id)
-
-    old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
-
-    # Create new version with same file type as original
-    extension = old_doc_vers.file_type
-
-    new_doc_vers = create_version_for_document(
-        doc, tenant_id,
-        '',  # No URL for content-based updates
-        old_doc_vers.sub_file_type,
-        api_input.get('language', old_doc_vers.language),
-        api_input.get('user_context', old_doc_vers.user_context),
-        api_input.get('user_metadata', old_doc_vers.user_metadata),
-        api_input.get('catalog_properties', old_doc_vers.catalog_properties),
-    )
-
-    try:
-        db.session.add(new_doc_vers)
-        db.session.commit()
-    except SQLAlchemyError as e:
-        db.session.rollback()
-        return None, str(e)
-
-    # Upload new content
-    upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
-
-    # Start embedding task
-    task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id], queue='embeddings')
-    current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
-                            f'with task id: {task.id}.')
-
-    return new_doc_vers, task.id