- Improvements to EntitlementsDomain & Services - Prechecks in Document domain - Add audit information to LicenseUsage
613 lines
27 KiB
Python
613 lines
27 KiB
Python
import io
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Tuple, Any
|
|
|
|
import pytz
|
|
import requests
|
|
from flask import current_app, request
|
|
from flask_restx import Namespace, Resource, fields, reqparse
|
|
from flask_jwt_extended import jwt_required, get_jwt_identity
|
|
from sqlalchemy import desc
|
|
from werkzeug.datastructures import FileStorage
|
|
from werkzeug.utils import secure_filename
|
|
|
|
from common.models.document import DocumentVersion
|
|
from common.utils.document_utils import (
|
|
create_document_stack, process_url, start_embedding_task,
|
|
EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
|
get_documents_list, edit_document, refresh_document, edit_document_version,
|
|
refresh_document_with_info, lookup_document, refresh_document_with_content, clean_url
|
|
)
|
|
from common.utils.eveai_exceptions import EveAIException
|
|
from eveai_api.api.auth import requires_service
|
|
|
|
|
|
def validate_date(date_str):
|
|
try:
|
|
return datetime.fromisoformat(date_str).replace(tzinfo=pytz.UTC)
|
|
except ValueError:
|
|
raise ValueError("Invalid date format. Use ISO format (YYYY-MM-DDTHH:MM:SS).")
|
|
|
|
|
|
def validate_json(json_str):
|
|
try:
|
|
return json.loads(json_str)
|
|
except json.JSONDecodeError:
|
|
raise ValueError("Invalid JSON format for user_metadata.")
|
|
|
|
|
|
document_ns = Namespace('documents', description='Document related operations')
|
|
|
|
# Define models for request parsing and response serialization
|
|
upload_parser = reqparse.RequestParser()
|
|
upload_parser.add_argument('catalog_id', location='form', type=int, required=True,
|
|
help='The catalog to add the file to')
|
|
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
|
|
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
|
|
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
|
|
upload_parser.add_argument('user_context', location='form', type=str, required=False,
|
|
help='User context for the document')
|
|
upload_parser.add_argument('valid_from', location='form', type=validate_date, required=False,
|
|
help='Valid from date for the document (ISO format)')
|
|
upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False,
|
|
help='User metadata for the document (JSON format)')
|
|
upload_parser.add_argument('catalog_properties', location='form', type=validate_json, required=False,
|
|
help='The catalog configuration to be passed along (JSON format). Validity is against catalog requirements '
|
|
'is not checked, and is the responsibility of the calling client.')
|
|
|
|
add_document_response = document_ns.model('AddDocumentResponse', {
|
|
'message': fields.String(description='Status message'),
|
|
'document_id': fields.Integer(description='ID of the created document'),
|
|
'document_version_id': fields.Integer(description='ID of the created document version'),
|
|
'task_id': fields.String(description='ID of the embedding task')
|
|
})
|
|
|
|
|
|
@document_ns.route('/add_document')
|
|
class AddDocument(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.expect(upload_parser)
|
|
@document_ns.response(201, 'Document added successfully', add_document_response)
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(500, 'Internal Server Error')
|
|
def post(self):
|
|
"""
|
|
Upload a new document to EveAI by directly providing the file content.
|
|
|
|
This endpoint accepts multipart/form-data with the file content and metadata. It processes
|
|
the file, creates a new document in the specified catalog, and initiates the embedding
|
|
process.
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document for tenant {tenant_id}')
|
|
|
|
try:
|
|
args = upload_parser.parse_args()
|
|
except Exception as e:
|
|
current_app.logger.error(f"Error parsing arguments: {str(e)}")
|
|
current_app.logger.error(f"Exception type: {type(e)}")
|
|
raise
|
|
|
|
try:
|
|
file = args['file']
|
|
filename = secure_filename(file.filename)
|
|
extension = filename.rsplit('.', 1)[1].lower()
|
|
|
|
# validate_file_type(extension)
|
|
|
|
api_input = {
|
|
'catalog_id': args.get('catalog_id'),
|
|
'name': args.get('name') or filename,
|
|
'language': args.get('language'),
|
|
'user_context': args.get('user_context'),
|
|
'valid_from': args.get('valid_from'),
|
|
'user_metadata': args.get('user_metadata'),
|
|
'catalog_properties': args.get('catalog_properties'),
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except EveAIException as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
document_ns.abort(422, str(e))
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
document_ns.abort(500, 'Error adding document')
|
|
|
|
|
|
# Models for AddDocumentThroughURL
|
|
add_document_through_url = document_ns.model('AddDocumentThroughURL', {
|
|
'catalog_id': fields.Integer(required=True, description='ID of the catalog the URL needs to be added to'),
|
|
'temp_url': fields.String(required=True, description='Temporary URL of the document to add'),
|
|
'name': fields.String(required=True, description='Name of the document'),
|
|
'language': fields.String(required=True, description='Language of the document'),
|
|
'user_context': fields.String(required=False, description='User context for the document'),
|
|
'valid_from': fields.String(required=False, description='Valid from date for the document'),
|
|
'user_metadata': fields.String(required=False, description='User metadata for the document'),
|
|
'system_metadata': fields.String(required=False, description='System metadata for the document'),
|
|
'catalog_properties': fields.String(required=False,
|
|
description='The catalog configuration to be passed along (JSON '
|
|
'format). Validity is against catalog requirements '
|
|
'is not checked, and is the responsibility of the '
|
|
'calling client.'),
|
|
})
|
|
|
|
add_document_through_url_response = document_ns.model('AddDocumentThroughURLResponse', {
|
|
'message': fields.String(description='Status message'),
|
|
'document_id': fields.Integer(description='ID of the created document'),
|
|
'document_version_id': fields.Integer(description='ID of the created document version'),
|
|
'task_id': fields.String(description='ID of the embedding task')
|
|
})
|
|
|
|
|
|
@document_ns.route('/add_document_through_url')
|
|
class AddDocumentThroughURL(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.expect(add_document_through_url)
|
|
@document_ns.response(201, 'Document added successfully', add_document_through_url)
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(422, 'File could not be processed')
|
|
@document_ns.response(500, 'Internal Server Error')
|
|
def post(self):
|
|
"""
|
|
Add a new document to EveAI using a temporary URL.
|
|
|
|
This endpoint is primarily used for integration with services that provide temporary URLs
|
|
(like Zapier). The URL content is downloaded and processed as a new document.
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document through url for tenant {tenant_id}')
|
|
|
|
try:
|
|
args = document_ns.payload
|
|
except Exception as e:
|
|
current_app.logger.error(f"Error parsing arguments: {str(e)}")
|
|
current_app.logger.error(f"Exception type: {type(e)}")
|
|
raise
|
|
|
|
try:
|
|
user_metadata = json.loads(args.get('user_metadata', '{}'))
|
|
actual_file_content, actual_file_content_type = download_file_content(args['temp_url'], user_metadata)
|
|
|
|
# Get filename from URL or use provided name
|
|
filename = secure_filename(args.get('name'))
|
|
extension = filename.rsplit('.', 1)[1].lower() if '.' in filename else ''
|
|
|
|
# Create FileStorage object from downloaded content
|
|
file_content = io.BytesIO(actual_file_content)
|
|
file = FileStorage(
|
|
stream=file_content,
|
|
filename=filename,
|
|
content_type=actual_file_content_type
|
|
)
|
|
|
|
current_app.logger.info(f"Successfully downloaded file: {filename}")
|
|
except requests.RequestException as e:
|
|
current_app.logger.error(f"Error downloading file: {str(e)}")
|
|
return {'message': f'Error downloading file: {str(e)}'}, 422
|
|
|
|
try:
|
|
# Prepare API input
|
|
api_input = {
|
|
'catalog_id': args.get('catalog_id'),
|
|
'name': args.get('name') or filename,
|
|
'language': args.get('language'),
|
|
'user_context': args.get('user_context'),
|
|
'valid_from': args.get('valid_from'),
|
|
'user_metadata': args.get('user_metadata'),
|
|
'catalog_properties': args.get('catalog_properties'),
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
except EveAIException as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
return {'message': str(e)}, 422
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
return {'message': 'Error adding document'}, 500
|
|
|
|
|
|
# Models for AddURL
|
|
add_url_model = document_ns.model('AddURL', {
|
|
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
|
|
'url': fields.String(required=True, description='URL of the document to add'),
|
|
'name': fields.String(required=False, description='Name of the document'),
|
|
'language': fields.String(required=True, description='Language of the document'),
|
|
'user_context': fields.String(required=False, description='User context for the document'),
|
|
'valid_from': fields.String(required=False, description='Valid from date for the document'),
|
|
'user_metadata': fields.String(required=False, description='User metadata for the document'),
|
|
'system_metadata': fields.String(required=False, description='System metadata for the document'),
|
|
'catalog_properties': fields.String(required=False,
|
|
description='The catalog configuration to be passed along (JSON '
|
|
'format). Validity is against catalog requirements '
|
|
'is not checked, and is the responsibility of the '
|
|
'calling client.'),
|
|
})
|
|
|
|
add_url_response = document_ns.model('AddURLResponse', {
|
|
'message': fields.String(description='Status message'),
|
|
'document_id': fields.Integer(description='ID of the created document'),
|
|
'document_version_id': fields.Integer(description='ID of the created document version'),
|
|
'task_id': fields.String(description='ID of the embedding task')
|
|
})
|
|
|
|
|
|
@document_ns.route('/add_url')
|
|
class AddURL(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.expect(add_url_model)
|
|
@document_ns.response(201, 'Document added successfully', add_url_response)
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(500, 'Internal Server Error')
|
|
def post(self):
|
|
"""
|
|
Add a new document to EveAI from a permanent URL.
|
|
|
|
This endpoint is used for URLs that will remain accessible. The URL is stored and can
|
|
be used to refresh the document's content later.
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
|
|
|
|
try:
|
|
args = document_ns.payload
|
|
cleaned_url = clean_url(args['url'])
|
|
file_content, filename, extension = process_url(cleaned_url, tenant_id)
|
|
|
|
api_input = {
|
|
'catalog_id': args['catalog_id'],
|
|
'url': cleaned_url,
|
|
'name': args.get('name') or filename,
|
|
'language': args['language'],
|
|
'user_context': args.get('user_context'),
|
|
'valid_from': args.get('valid_from'),
|
|
'user_metadata': args.get('user_metadata'),
|
|
'catalog_properties': args.get('catalog_properties'),
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except EveAIException as e:
|
|
current_app.logger.error(f'Error adding document from URL: {str(e)}')
|
|
document_ns.abort(422, str(e))
|
|
return None
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document from URL: {str(e)}')
|
|
document_ns.abort(500, 'Error adding document from URL')
|
|
return None
|
|
|
|
|
|
document_list_model = document_ns.model('DocumentList', {
|
|
'id': fields.Integer(description='Document ID'),
|
|
'name': fields.String(description='Document name'),
|
|
'valid_from': fields.DateTime(description='Valid from date'),
|
|
'valid_to': fields.DateTime(description='Valid to date'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/list')
|
|
class DocumentList(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.doc('list_documents')
|
|
@document_ns.marshal_list_with(document_list_model, envelope='documents')
|
|
def get(self):
|
|
"""List all documents"""
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 10, type=int)
|
|
pagination = get_documents_list(page, per_page)
|
|
return pagination.items, 200
|
|
|
|
|
|
edit_document_model = document_ns.model('EditDocument', {
|
|
'name': fields.String(required=True, description='New name for the document'),
|
|
'valid_from': fields.DateTime(required=False, description='New valid from date'),
|
|
'valid_to': fields.DateTime(required=False, description='New valid to date'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/<int:document_id>')
|
|
class DocumentResource(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.doc('edit_document')
|
|
@document_ns.expect(edit_document_model)
|
|
@document_ns.response(200, 'Document updated successfully')
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(404, 'Document not found')
|
|
@document_ns.response(500, 'Internal Server Error')
|
|
def put(self, document_id):
|
|
"""Edit a document. The content of the document will not be refreshed!"""
|
|
try:
|
|
data = request.json
|
|
tenant_id = get_jwt_identity()
|
|
updated_doc, error = edit_document(tenant_id, document_id, data.get('name', None),
|
|
data.get('valid_from', None), data.get('valid_to', None))
|
|
if updated_doc:
|
|
return {'message': f'Document {updated_doc.id} updated successfully'}, 200
|
|
else:
|
|
return {'message': f'Error updating document: {error}'}, 400
|
|
except EveAIException as e:
|
|
return e.to_dict(), e.status_code
|
|
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.doc('refresh_document')
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
def post(self, document_id):
|
|
"""Refresh a document. In this case, the content of the document will be refreshed! This requires the document
|
|
version to have a permanent and accessible URL!"""
|
|
tenant_id = get_jwt_identity()
|
|
new_version, result = refresh_document(document_id, tenant_id)
|
|
if new_version:
|
|
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
|
|
else:
|
|
return {'message': f'Error refreshing document: {result}'}, 400
|
|
|
|
|
|
edit_document_version_model = document_ns.model('EditDocumentVersion', {
|
|
'user_context': fields.String(required=True, description='New user context for the document version'),
|
|
'catalog_properties': fields.String(required=True, description='New catalog properties for the document version'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/version/<int:version_id>')
|
|
class DocumentVersionResource(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.doc('edit_document_version')
|
|
@document_ns.expect(edit_document_version_model)
|
|
@document_ns.response(200, 'Document version updated successfully')
|
|
def put(self, version_id):
|
|
"""Edit a document version"""
|
|
data = request.json
|
|
tenant_id = get_jwt_identity()
|
|
updated_version, error = edit_document_version(tenant_id, version_id, data['user_context'],
|
|
data.get('catalog_properties'))
|
|
if updated_version:
|
|
return {'message': f'Document Version {updated_version.id} updated successfully'}, 200
|
|
else:
|
|
return {'message': f'Error updating document version: {error}'}, 400
|
|
|
|
|
|
# Define the model for the request body of refresh_with_info
|
|
refresh_document_model = document_ns.model('RefreshDocument', {
|
|
'name': fields.String(required=False, description='New name for the document'),
|
|
'language': fields.String(required=False, description='Language of the document'),
|
|
'user_context': fields.String(required=False, description='User context for the document'),
|
|
'user_metadata': fields.Raw(required=False, description='User metadata for the document'),
|
|
'catalog_properties': fields.Raw(required=False, description='Catalog properties for the document'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/<int:document_id>/refresh')
|
|
class RefreshDocument(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
@document_ns.response(404, 'Document not found')
|
|
def post(self, document_id):
|
|
"""
|
|
Refresh a document without additional information. In this case, the content of the document will be refreshed!
|
|
This requires the document version to have a permanent and accessible URL!
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}')
|
|
|
|
try:
|
|
new_version, result = refresh_document(document_id, tenant_id)
|
|
|
|
if new_version:
|
|
return {
|
|
'message': f'Document refreshed successfully. New version: {new_version.id}. Task ID: {result}',
|
|
'document_id': document_id,
|
|
'document_version_id': new_version.id,
|
|
'task_id': result
|
|
}, 200
|
|
else:
|
|
return {'message': f'Error refreshing document: {result}'}, 400
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error refreshing document: {str(e)}')
|
|
return {'message': 'Internal server error'}, 500
|
|
|
|
|
|
@document_ns.route('/<int:document_id>/refresh_with_info')
|
|
class RefreshDocumentWithInfo(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.expect(refresh_document_model)
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(404, 'Document not found')
|
|
def post(self, document_id):
|
|
"""
|
|
Refresh a document with new version information.
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Refreshing document {document_id} with info for tenant {tenant_id}')
|
|
|
|
try:
|
|
api_input = request.json
|
|
new_version, result = refresh_document_with_info(document_id, tenant_id, api_input)
|
|
|
|
if new_version:
|
|
return {
|
|
'message': f'Document refreshed successfully with new info. New version: {new_version.id}. Task ID: {result}',
|
|
'document_id': document_id,
|
|
'document_version_id': new_version.id,
|
|
'task_id': result
|
|
}, 200
|
|
else:
|
|
return {'message': f'Error refreshing document with info: {result}'}, 400
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error refreshing document with info: {str(e)}')
|
|
return {'message': 'Internal server error'}, 500
|
|
|
|
|
|
# Define models for lookup requests
|
|
lookup_model = document_ns.model('DocumentLookup', {
|
|
'lookup_criteria': fields.Raw(required=True,
|
|
description='JSON object containing key-value pairs to match in metadata. '
|
|
'Example: {"external_id": "123", "source": "zapier", "source_type": "google_docs"}'),
|
|
'metadata_type': fields.String(required=True, enum=['user_metadata', 'system_metadata'],
|
|
description='Which metadata field to search in')
|
|
})
|
|
|
|
lookup_response = document_ns.model('DocumentLookupResponse', {
|
|
'document_id': fields.Integer(description='ID of the found document'),
|
|
'document_version_id': fields.Integer(description='ID of the latest document version'),
|
|
'name': fields.String(description='Document name'),
|
|
'metadata': fields.Raw(description='Full metadata of the found document')
|
|
})
|
|
|
|
|
|
@document_ns.route('/lookup')
|
|
class DocumentLookup(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.expect(lookup_model)
|
|
@document_ns.marshal_with(lookup_response)
|
|
@document_ns.response(200, 'Document found', lookup_response)
|
|
@document_ns.response(404, 'No document found matching criteria')
|
|
def post(self):
|
|
"""
|
|
Look up a document using metadata criteria
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
try:
|
|
data = request.json
|
|
document, version = lookup_document(
|
|
tenant_id,
|
|
data['lookup_criteria'],
|
|
data['metadata_type']
|
|
)
|
|
|
|
return {
|
|
'document_id': document.id,
|
|
'document_version_id': version.id,
|
|
'name': document.name,
|
|
'metadata': getattr(version, data['metadata_type'])
|
|
}
|
|
|
|
except EveAIException as e:
|
|
return e.to_dict(), e.status_code
|
|
|
|
except KeyError as e:
|
|
return {'message': f'Missing required field: {str(e)}'}, 400
|
|
|
|
|
|
refresh_url_model = document_ns.model('RefreshDocumentThroughURL', {
|
|
'temp_url': fields.String(required=True, description='Temporary URL of the updated document content'),
|
|
'language': fields.String(required=False, description='Language of the document'),
|
|
'user_context': fields.String(required=False, description='User context for the document'),
|
|
'user_metadata': fields.Raw(required=False, description='Custom metadata fields'),
|
|
'catalog_properties': fields.Raw(required=False, description='Catalog-specific properties'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/<int:document_id>/refresh_through_url')
|
|
class RefreshDocumentThroughURL(Resource):
|
|
@jwt_required()
|
|
@requires_service('DOCAPI')
|
|
@document_ns.expect(refresh_url_model)
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
def post(self, document_id):
|
|
"""Refresh a document using content from a URL"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Refreshing document {document_id} through URL for tenant {tenant_id}')
|
|
|
|
try:
|
|
# Get filename from the existing version
|
|
old_doc_vers = (DocumentVersion.query.filter_by(doc_id=document_id).
|
|
order_by(desc(DocumentVersion.id)).first())
|
|
filename = f"{old_doc_vers.id}.{old_doc_vers.file_type}"
|
|
|
|
args = request.json
|
|
user_metadata = json.loads(args.get('user_metadata', '{}'))
|
|
|
|
try:
|
|
actual_file_content, actual_file_content_type = download_file_content(args['temp_url'], user_metadata)
|
|
file_content = io.BytesIO(actual_file_content)
|
|
file = FileStorage(
|
|
stream=file_content,
|
|
filename=filename,
|
|
content_type=actual_file_content_type
|
|
)
|
|
|
|
new_version, task_id = refresh_document_with_content(
|
|
document_id,
|
|
tenant_id,
|
|
actual_file_content,
|
|
args
|
|
)
|
|
|
|
return {
|
|
'message': f'Document refreshed successfully. New version: {new_version.id}. Task ID: {task_id}',
|
|
'document_id': document_id,
|
|
'document_version_id': new_version.id,
|
|
'task_id': task_id
|
|
}, 200
|
|
|
|
except requests.RequestException as e:
|
|
current_app.logger.error(f"Error downloading file: {str(e)}")
|
|
return {'message': f'Error downloading file: {str(e)}'}, 422
|
|
|
|
except EveAIException as e:
|
|
return e.to_dict(), e.status_code
|
|
|
|
|
|
def download_file_content(url: str, user_metadata: dict) -> tuple[Any, Any]:
|
|
if user_metadata and 'service' in user_metadata and 'Zapier' in user_metadata['service']:
|
|
# Zapier uses a system of Stashed URLs
|
|
# Step 1: Download from stashed URL
|
|
stashed_url = url
|
|
current_app.logger.info(f"Downloading stashed file from URL: {stashed_url}")
|
|
response = requests.get(stashed_url, stream=True)
|
|
response.raise_for_status()
|
|
|
|
hydration_url = response.text.strip()
|
|
current_app.logger.info(f"Downloading actual file from URL: {hydration_url}")
|
|
# Step 2: Download from hydration URL
|
|
actual_file_response = requests.get(hydration_url, stream=True)
|
|
actual_file_response.raise_for_status()
|
|
actual_file_content = actual_file_response.content
|
|
else:
|
|
actual_url = url
|
|
actual_file_response = requests.get(actual_url, stream=True)
|
|
actual_file_response.raise_for_status()
|
|
actual_file_content = actual_file_response.content
|
|
|
|
actual_file_content_type = actual_file_response.headers.get('content-type', 'application/octet-stream')
|
|
|
|
return actual_file_content, actual_file_content_type
|