319 lines
14 KiB
Python
319 lines
14 KiB
Python
import json
|
|
from datetime import datetime
|
|
|
|
import pytz
|
|
from flask import current_app, request
|
|
from flask_restx import Namespace, Resource, fields, reqparse
|
|
from flask_jwt_extended import jwt_required, get_jwt_identity
|
|
from werkzeug.datastructures import FileStorage
|
|
from werkzeug.utils import secure_filename
|
|
from common.utils.document_utils import (
|
|
create_document_stack, process_url, start_embedding_task,
|
|
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
|
process_multiple_urls, get_documents_list, edit_document, refresh_document, edit_document_version,
|
|
refresh_document_with_info
|
|
)
|
|
|
|
|
|
def validate_date(date_str):
|
|
try:
|
|
return datetime.fromisoformat(date_str).replace(tzinfo=pytz.UTC)
|
|
except ValueError:
|
|
raise ValueError("Invalid date format. Use ISO format (YYYY-MM-DDTHH:MM:SS).")
|
|
|
|
|
|
def validate_json(json_str):
|
|
try:
|
|
return json.loads(json_str)
|
|
except json.JSONDecodeError:
|
|
raise ValueError("Invalid JSON format for user_metadata.")
|
|
|
|
|
|
document_ns = Namespace('documents', description='Document related operations')
|
|
|
|
# Define models for request parsing and response serialization
|
|
upload_parser = reqparse.RequestParser()
|
|
upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to')
|
|
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
|
|
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
|
|
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
|
|
upload_parser.add_argument('user_context', location='form', type=str, required=False,
|
|
help='User context for the document')
|
|
upload_parser.add_argument('valid_from', location='form', type=validate_date, required=False,
|
|
help='Valid from date for the document (ISO format)')
|
|
upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False,
|
|
help='User metadata for the document (JSON format)')
|
|
|
|
add_document_response = document_ns.model('AddDocumentResponse', {
|
|
'message': fields.String(description='Status message'),
|
|
'document_id': fields.Integer(description='ID of the created document'),
|
|
'document_version_id': fields.Integer(description='ID of the created document version'),
|
|
'task_id': fields.String(description='ID of the embedding task')
|
|
})
|
|
|
|
|
|
@document_ns.route('/add_document')
|
|
class AddDocument(Resource):
|
|
@jwt_required()
|
|
@document_ns.expect(upload_parser)
|
|
@document_ns.response(201, 'Document added successfully', add_document_response)
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(500, 'Internal Server Error')
|
|
def post(self):
|
|
"""
|
|
Add a new document
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document for tenant {tenant_id}')
|
|
|
|
try:
|
|
args = upload_parser.parse_args()
|
|
|
|
file = args['file']
|
|
filename = secure_filename(file.filename)
|
|
extension = filename.rsplit('.', 1)[1].lower()
|
|
|
|
validate_file_type(extension)
|
|
|
|
api_input = {
|
|
'catalog_id': args.get('catalog_id'),
|
|
'name': args.get('name') or filename,
|
|
'language': args.get('language'),
|
|
'user_context': args.get('user_context'),
|
|
'valid_from': args.get('valid_from'),
|
|
'user_metadata': args.get('user_metadata'),
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
document_ns.abort(400, str(e))
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
document_ns.abort(500, 'Error adding document')
|
|
|
|
|
|
# Models for AddURL
|
|
add_url_model = document_ns.model('AddURL', {
|
|
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
|
|
'url': fields.String(required=True, description='URL of the document to add'),
|
|
'name': fields.String(required=False, description='Name of the document'),
|
|
'language': fields.String(required=True, description='Language of the document'),
|
|
'user_context': fields.String(required=False, description='User context for the document'),
|
|
'valid_from': fields.String(required=False, description='Valid from date for the document'),
|
|
'user_metadata': fields.String(required=False, description='User metadata for the document'),
|
|
'system_metadata': fields.String(required=False, description='System metadata for the document')
|
|
})
|
|
|
|
add_url_response = document_ns.model('AddURLResponse', {
|
|
'message': fields.String(description='Status message'),
|
|
'document_id': fields.Integer(description='ID of the created document'),
|
|
'document_version_id': fields.Integer(description='ID of the created document version'),
|
|
'task_id': fields.String(description='ID of the embedding task')
|
|
})
|
|
|
|
|
|
@document_ns.route('/add_url')
|
|
class AddURL(Resource):
|
|
@jwt_required()
|
|
@document_ns.expect(add_url_model)
|
|
@document_ns.response(201, 'Document added successfully', add_url_response)
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(500, 'Internal Server Error')
|
|
def post(self):
|
|
"""
|
|
Add a new document from URL
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
|
|
|
|
try:
|
|
args = document_ns.payload
|
|
file_content, filename, extension = process_url(args['url'], tenant_id)
|
|
|
|
api_input = {
|
|
'catalog_id': args['catalog_id'],
|
|
'url': args['url'],
|
|
'name': args.get('name') or filename,
|
|
'language': args['language'],
|
|
'user_context': args.get('user_context'),
|
|
'valid_from': args.get('valid_from'),
|
|
'user_metadata': args.get('user_metadata'),
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except EveAIDoubleURLException:
|
|
document_ns.abort(400, f'A document with URL {args["url"]} already exists.')
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
document_ns.abort(400, str(e))
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document from URL: {str(e)}')
|
|
document_ns.abort(500, 'Error adding document from URL')
|
|
|
|
|
|
document_list_model = document_ns.model('DocumentList', {
|
|
'id': fields.Integer(description='Document ID'),
|
|
'name': fields.String(description='Document name'),
|
|
'valid_from': fields.DateTime(description='Valid from date'),
|
|
'valid_to': fields.DateTime(description='Valid to date'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/list')
|
|
class DocumentList(Resource):
|
|
@jwt_required()
|
|
@document_ns.doc('list_documents')
|
|
@document_ns.marshal_list_with(document_list_model, envelope='documents')
|
|
def get(self):
|
|
"""List all documents"""
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 10, type=int)
|
|
pagination = get_documents_list(page, per_page)
|
|
return pagination.items, 200
|
|
|
|
|
|
edit_document_model = document_ns.model('EditDocument', {
|
|
'name': fields.String(required=True, description='New name for the document'),
|
|
'valid_from': fields.DateTime(required=False, description='New valid from date'),
|
|
'valid_to': fields.DateTime(required=False, description='New valid to date'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/<int:document_id>')
|
|
class DocumentResource(Resource):
|
|
@jwt_required()
|
|
@document_ns.doc('edit_document')
|
|
@document_ns.expect(edit_document_model)
|
|
@document_ns.response(200, 'Document updated successfully')
|
|
def put(self, document_id):
|
|
"""Edit a document"""
|
|
data = request.json
|
|
updated_doc, error = edit_document(document_id, data['name'], data.get('valid_from'), data.get('valid_to'))
|
|
if updated_doc:
|
|
return {'message': f'Document {updated_doc.id} updated successfully'}, 200
|
|
else:
|
|
return {'message': f'Error updating document: {error}'}, 400
|
|
|
|
@jwt_required()
|
|
@document_ns.doc('refresh_document')
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
def post(self, document_id):
|
|
"""Refresh a document"""
|
|
tenant_id = get_jwt_identity()
|
|
new_version, result = refresh_document(document_id, tenant_id)
|
|
if new_version:
|
|
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
|
|
else:
|
|
return {'message': f'Error refreshing document: {result}'}, 400
|
|
|
|
|
|
edit_document_version_model = document_ns.model('EditDocumentVersion', {
|
|
'user_context': fields.String(required=True, description='New user context for the document version'),
|
|
})
|
|
|
|
|
|
@document_ns.route('/version/<int:version_id>')
|
|
class DocumentVersionResource(Resource):
|
|
@jwt_required()
|
|
@document_ns.doc('edit_document_version')
|
|
@document_ns.expect(edit_document_version_model)
|
|
@document_ns.response(200, 'Document version updated successfully')
|
|
def put(self, version_id):
|
|
"""Edit a document version"""
|
|
data = request.json
|
|
updated_version, error = edit_document_version(version_id, data['user_context'])
|
|
if updated_version:
|
|
return {'message': f'Document Version {updated_version.id} updated successfully'}, 200
|
|
else:
|
|
return {'message': f'Error updating document version: {error}'}, 400
|
|
|
|
|
|
# Define the model for the request body of refresh_with_info
|
|
refresh_document_model = document_ns.model('RefreshDocument', {
|
|
'name': fields.String(required=False, description='New name for the document'),
|
|
'language': fields.String(required=False, description='Language of the document'),
|
|
'user_context': fields.String(required=False, description='User context for the document'),
|
|
'user_metadata': fields.Raw(required=False, description='User metadata for the document')
|
|
})
|
|
|
|
|
|
@document_ns.route('/<int:document_id>/refresh')
|
|
class RefreshDocument(Resource):
|
|
@jwt_required()
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
@document_ns.response(404, 'Document not found')
|
|
def post(self, document_id):
|
|
"""
|
|
Refresh a document without additional information
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}')
|
|
|
|
try:
|
|
new_version, result = refresh_document(document_id)
|
|
|
|
if new_version:
|
|
return {
|
|
'message': f'Document refreshed successfully. New version: {new_version.id}. Task ID: {result}',
|
|
'document_id': document_id,
|
|
'document_version_id': new_version.id,
|
|
'task_id': result
|
|
}, 200
|
|
else:
|
|
return {'message': f'Error refreshing document: {result}'}, 400
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error refreshing document: {str(e)}')
|
|
return {'message': 'Internal server error'}, 500
|
|
|
|
|
|
@document_ns.route('/<int:document_id>/refresh_with_info')
|
|
class RefreshDocumentWithInfo(Resource):
|
|
@jwt_required()
|
|
@document_ns.expect(refresh_document_model)
|
|
@document_ns.response(200, 'Document refreshed successfully')
|
|
@document_ns.response(400, 'Validation Error')
|
|
@document_ns.response(404, 'Document not found')
|
|
def post(self, document_id):
|
|
"""
|
|
Refresh a document with new information
|
|
"""
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Refreshing document {document_id} with info for tenant {tenant_id}')
|
|
|
|
try:
|
|
api_input = request.json
|
|
new_version, result = refresh_document_with_info(document_id, api_input)
|
|
|
|
if new_version:
|
|
return {
|
|
'message': f'Document refreshed successfully with new info. New version: {new_version.id}. Task ID: {result}',
|
|
'document_id': document_id,
|
|
'document_version_id': new_version.id,
|
|
'task_id': result
|
|
}, 200
|
|
else:
|
|
return {'message': f'Error refreshing document with info: {result}'}, 400
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error refreshing document with info: {str(e)}')
|
|
return {'message': 'Internal server error'}, 500
|