- Addition of general chunking parameters chunking_heading_level and chunking patterns
- Addition of Processor types docx and markdown
This commit is contained in:
@@ -11,9 +11,10 @@ from common.utils.document_utils import (
|
||||
create_document_stack, process_url, start_embedding_task,
|
||||
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
||||
get_documents_list, edit_document, refresh_document, edit_document_version,
|
||||
refresh_document_with_info
|
||||
refresh_document_with_info, lookup_document
|
||||
)
|
||||
from common.utils.eveai_exceptions import EveAIException
|
||||
from eveai_api.api.auth import requires_service
|
||||
|
||||
|
||||
def validate_date(date_str):
|
||||
@@ -59,6 +60,7 @@ add_document_response = document_ns.model('AddDocumentResponse', {
|
||||
@document_ns.route('/add_document')
|
||||
class AddDocument(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.expect(upload_parser)
|
||||
@document_ns.response(201, 'Document added successfully', add_document_response)
|
||||
@document_ns.response(400, 'Validation Error')
|
||||
@@ -134,6 +136,7 @@ add_url_response = document_ns.model('AddURLResponse', {
|
||||
@document_ns.route('/add_url')
|
||||
class AddURL(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.expect(add_url_model)
|
||||
@document_ns.response(201, 'Document added successfully', add_url_response)
|
||||
@document_ns.response(400, 'Validation Error')
|
||||
@@ -190,6 +193,7 @@ document_list_model = document_ns.model('DocumentList', {
|
||||
@document_ns.route('/list')
|
||||
class DocumentList(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.doc('list_documents')
|
||||
@document_ns.marshal_list_with(document_list_model, envelope='documents')
|
||||
def get(self):
|
||||
@@ -210,6 +214,7 @@ edit_document_model = document_ns.model('EditDocument', {
|
||||
@document_ns.route('/<int:document_id>')
|
||||
class DocumentResource(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.doc('edit_document')
|
||||
@document_ns.expect(edit_document_model)
|
||||
@document_ns.response(200, 'Document updated successfully')
|
||||
@@ -232,6 +237,7 @@ class DocumentResource(Resource):
|
||||
return e.to_dict(), e.status_code
|
||||
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.doc('refresh_document')
|
||||
@document_ns.response(200, 'Document refreshed successfully')
|
||||
def post(self, document_id):
|
||||
@@ -253,6 +259,7 @@ edit_document_version_model = document_ns.model('EditDocumentVersion', {
|
||||
@document_ns.route('/version/<int:version_id>')
|
||||
class DocumentVersionResource(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.doc('edit_document_version')
|
||||
@document_ns.expect(edit_document_version_model)
|
||||
@document_ns.response(200, 'Document version updated successfully')
|
||||
@@ -280,6 +287,7 @@ refresh_document_model = document_ns.model('RefreshDocument', {
|
||||
@document_ns.route('/<int:document_id>/refresh')
|
||||
class RefreshDocument(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.response(200, 'Document refreshed successfully')
|
||||
@document_ns.response(404, 'Document not found')
|
||||
def post(self, document_id):
|
||||
@@ -310,6 +318,7 @@ class RefreshDocument(Resource):
|
||||
@document_ns.route('/<int:document_id>/refresh_with_info')
|
||||
class RefreshDocumentWithInfo(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.expect(refresh_document_model)
|
||||
@document_ns.response(200, 'Document refreshed successfully')
|
||||
@document_ns.response(400, 'Validation Error')
|
||||
@@ -338,3 +347,112 @@ class RefreshDocumentWithInfo(Resource):
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error refreshing document with info: {str(e)}')
|
||||
return {'message': 'Internal server error'}, 500
|
||||
|
||||
|
||||
# Define models for lookup requests
|
||||
lookup_model = document_ns.model('DocumentLookup', {
|
||||
'lookup_criteria': fields.Raw(required=True,
|
||||
description='JSON object containing key-value pairs to match in metadata. '
|
||||
'Example: {"external_id": "123", "source": "zapier", "source_type": "google_docs"}'),
|
||||
'metadata_type': fields.String(required=True, enum=['user_metadata', 'system_metadata'],
|
||||
description='Which metadata field to search in')
|
||||
})
|
||||
|
||||
lookup_response = document_ns.model('DocumentLookupResponse', {
|
||||
'document_id': fields.Integer(description='ID of the found document'),
|
||||
'document_version_id': fields.Integer(description='ID of the latest document version'),
|
||||
'name': fields.String(description='Document name'),
|
||||
'metadata': fields.Raw(description='Full metadata of the found document')
|
||||
})
|
||||
|
||||
|
||||
@document_ns.route('/lookup')
|
||||
class DocumentLookup(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.expect(lookup_model)
|
||||
@document_ns.marshal_with(lookup_response)
|
||||
@document_ns.response(200, 'Document found', lookup_response)
|
||||
@document_ns.response(404, 'No document found matching criteria')
|
||||
def post(self):
|
||||
"""
|
||||
Look up a document using metadata criteria
|
||||
"""
|
||||
tenant_id = get_jwt_identity()
|
||||
try:
|
||||
data = request.json
|
||||
document, version = lookup_document(
|
||||
tenant_id,
|
||||
data['lookup_criteria'],
|
||||
data['metadata_type']
|
||||
)
|
||||
|
||||
return {
|
||||
'document_id': document.id,
|
||||
'document_version_id': version.id,
|
||||
'name': document.name,
|
||||
'metadata': getattr(version, data['metadata_type'])
|
||||
}
|
||||
|
||||
except EveAIException as e:
|
||||
return e.to_dict(), e.status_code
|
||||
|
||||
except KeyError as e:
|
||||
return {'message': f'Missing required field: {str(e)}'}, 400
|
||||
|
||||
|
||||
refresh_content_model = document_ns.model('RefreshDocumentContent', {
|
||||
'file_content': fields.Raw(required=True, description='The new file content'),
|
||||
'language': fields.String(required=False, description='Language of the document'),
|
||||
'user_context': fields.String(required=False, description='User context for the document'),
|
||||
'user_metadata': fields.Raw(required=False, description='Custom metadata fields'),
|
||||
'catalog_properties': fields.Raw(required=False, description='Catalog-specific properties'),
|
||||
'trigger_service': fields.String(required=False, description='Service that triggered the update')
|
||||
})
|
||||
|
||||
|
||||
@document_ns.route('/<int:document_id>/refresh_content')
|
||||
class RefreshDocumentContent(Resource):
|
||||
@jwt_required()
|
||||
@requires_service('DOCAPI')
|
||||
@document_ns.expect(refresh_content_model)
|
||||
@document_ns.response(200, 'Document refreshed successfully')
|
||||
def post(self, document_id):
|
||||
"""Refresh a document with new content"""
|
||||
tenant_id = get_jwt_identity()
|
||||
try:
|
||||
data = request.json
|
||||
file_content = data['file_content']
|
||||
|
||||
# Build user_metadata by merging:
|
||||
# 1. Existing metadata (if any)
|
||||
# 2. New metadata from request
|
||||
# 3. Zapier-specific fields
|
||||
user_metadata = data.get('user_metadata', {})
|
||||
user_metadata.update({
|
||||
'source': 'zapier',
|
||||
'trigger_service': data.get('trigger_service')
|
||||
})
|
||||
data['user_metadata'] = user_metadata
|
||||
|
||||
# Keep catalog_properties separate
|
||||
if 'catalog_properties' in data:
|
||||
# We could add validation here against catalog configuration
|
||||
data['catalog_properties'] = data['catalog_properties']
|
||||
|
||||
new_version, task_id = refresh_document_with_content(
|
||||
document_id,
|
||||
tenant_id,
|
||||
file_content,
|
||||
data
|
||||
)
|
||||
|
||||
return {
|
||||
'message': f'Document refreshed successfully. New version: {new_version.id}. Task ID: {task_id}',
|
||||
'document_id': document_id,
|
||||
'document_version_id': new_version.id,
|
||||
'task_id': task_id
|
||||
}, 200
|
||||
|
||||
except EveAIException as e:
|
||||
return e.to_dict(), e.status_code
|
||||
|
||||
Reference in New Issue
Block a user