import json from datetime import datetime import pytz from flask import current_app, request from flask_restx import Namespace, Resource, fields, reqparse from flask_jwt_extended import jwt_required, get_jwt_identity from werkzeug.datastructures import FileStorage from werkzeug.utils import secure_filename from common.utils.document_utils import ( create_document_stack, process_url, start_embedding_task, validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType, process_multiple_urls, get_documents_list, edit_document, refresh_document, edit_document_version, refresh_document_with_info ) def validate_date(date_str): try: return datetime.fromisoformat(date_str).replace(tzinfo=pytz.UTC) except ValueError: raise ValueError("Invalid date format. Use ISO format (YYYY-MM-DDTHH:MM:SS).") def validate_json(json_str): try: return json.loads(json_str) except json.JSONDecodeError: raise ValueError("Invalid JSON format for user_metadata.") document_ns = Namespace('documents', description='Document related operations') # Define models for request parsing and response serialization upload_parser = reqparse.RequestParser() upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to') upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload') upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document') upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document') upload_parser.add_argument('user_context', location='form', type=str, required=False, help='User context for the document') upload_parser.add_argument('valid_from', location='form', type=validate_date, required=False, help='Valid from date for the document (ISO format)') upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False, help='User metadata for the document (JSON format)') add_document_response = document_ns.model('AddDocumentResponse', { 'message': fields.String(description='Status message'), 'document_id': fields.Integer(description='ID of the created document'), 'document_version_id': fields.Integer(description='ID of the created document version'), 'task_id': fields.String(description='ID of the embedding task') }) @document_ns.route('/add_document') class AddDocument(Resource): @jwt_required() @document_ns.expect(upload_parser) @document_ns.response(201, 'Document added successfully', add_document_response) @document_ns.response(400, 'Validation Error') @document_ns.response(500, 'Internal Server Error') def post(self): """ Add a new document """ tenant_id = get_jwt_identity() current_app.logger.info(f'Adding document for tenant {tenant_id}') try: args = upload_parser.parse_args() file = args['file'] filename = secure_filename(file.filename) extension = filename.rsplit('.', 1)[1].lower() validate_file_type(extension) api_input = { 'catalog_id': args.get('catalog_id'), 'name': args.get('name') or filename, 'language': args.get('language'), 'user_context': args.get('user_context'), 'valid_from': args.get('valid_from'), 'user_metadata': args.get('user_metadata'), } new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id) task_id = start_embedding_task(tenant_id, new_doc_vers.id) return { 'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.', 'document_id': new_doc.id, 'document_version_id': new_doc_vers.id, 'task_id': task_id }, 201 except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e: current_app.logger.error(f'Error adding document: {str(e)}') document_ns.abort(400, str(e)) except Exception as e: current_app.logger.error(f'Error adding document: {str(e)}') document_ns.abort(500, 'Error adding document') # Models for AddURL add_url_model = document_ns.model('AddURL', { 'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'), 'url': fields.String(required=True, description='URL of the document to add'), 'name': fields.String(required=False, description='Name of the document'), 'language': fields.String(required=True, description='Language of the document'), 'user_context': fields.String(required=False, description='User context for the document'), 'valid_from': fields.String(required=False, description='Valid from date for the document'), 'user_metadata': fields.String(required=False, description='User metadata for the document'), 'system_metadata': fields.String(required=False, description='System metadata for the document') }) add_url_response = document_ns.model('AddURLResponse', { 'message': fields.String(description='Status message'), 'document_id': fields.Integer(description='ID of the created document'), 'document_version_id': fields.Integer(description='ID of the created document version'), 'task_id': fields.String(description='ID of the embedding task') }) @document_ns.route('/add_url') class AddURL(Resource): @jwt_required() @document_ns.expect(add_url_model) @document_ns.response(201, 'Document added successfully', add_url_response) @document_ns.response(400, 'Validation Error') @document_ns.response(500, 'Internal Server Error') def post(self): """ Add a new document from URL """ tenant_id = get_jwt_identity() current_app.logger.info(f'Adding document from URL for tenant {tenant_id}') try: args = document_ns.payload file_content, filename, extension = process_url(args['url'], tenant_id) api_input = { 'catalog_id': args['catlog_id'], 'url': args['url'], 'name': args.get('name') or filename, 'language': args['language'], 'user_context': args.get('user_context'), 'valid_from': args.get('valid_from'), 'user_metadata': args.get('user_metadata'), } new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id) task_id = start_embedding_task(tenant_id, new_doc_vers.id) return { 'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.', 'document_id': new_doc.id, 'document_version_id': new_doc_vers.id, 'task_id': task_id }, 201 except EveAIDoubleURLException: document_ns.abort(400, f'A document with URL {args["url"]} already exists.') except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e: document_ns.abort(400, str(e)) except Exception as e: current_app.logger.error(f'Error adding document from URL: {str(e)}') document_ns.abort(500, 'Error adding document from URL') document_list_model = document_ns.model('DocumentList', { 'id': fields.Integer(description='Document ID'), 'name': fields.String(description='Document name'), 'valid_from': fields.DateTime(description='Valid from date'), 'valid_to': fields.DateTime(description='Valid to date'), }) @document_ns.route('/list') class DocumentList(Resource): @jwt_required() @document_ns.doc('list_documents') @document_ns.marshal_list_with(document_list_model, envelope='documents') def get(self): """List all documents""" page = request.args.get('page', 1, type=int) per_page = request.args.get('per_page', 10, type=int) pagination = get_documents_list(page, per_page) return pagination.items, 200 edit_document_model = document_ns.model('EditDocument', { 'name': fields.String(required=True, description='New name for the document'), 'valid_from': fields.DateTime(required=False, description='New valid from date'), 'valid_to': fields.DateTime(required=False, description='New valid to date'), }) @document_ns.route('/') class DocumentResource(Resource): @jwt_required() @document_ns.doc('edit_document') @document_ns.expect(edit_document_model) @document_ns.response(200, 'Document updated successfully') def put(self, document_id): """Edit a document""" data = request.json updated_doc, error = edit_document(document_id, data['name'], data.get('valid_from'), data.get('valid_to')) if updated_doc: return {'message': f'Document {updated_doc.id} updated successfully'}, 200 else: return {'message': f'Error updating document: {error}'}, 400 @jwt_required() @document_ns.doc('refresh_document') @document_ns.response(200, 'Document refreshed successfully') def post(self, document_id): """Refresh a document""" new_version, result = refresh_document(document_id) if new_version: return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200 else: return {'message': f'Error refreshing document: {result}'}, 400 edit_document_version_model = document_ns.model('EditDocumentVersion', { 'user_context': fields.String(required=True, description='New user context for the document version'), }) @document_ns.route('/version/') class DocumentVersionResource(Resource): @jwt_required() @document_ns.doc('edit_document_version') @document_ns.expect(edit_document_version_model) @document_ns.response(200, 'Document version updated successfully') def put(self, version_id): """Edit a document version""" data = request.json updated_version, error = edit_document_version(version_id, data['user_context']) if updated_version: return {'message': f'Document Version {updated_version.id} updated successfully'}, 200 else: return {'message': f'Error updating document version: {error}'}, 400 # Define the model for the request body of refresh_with_info refresh_document_model = document_ns.model('RefreshDocument', { 'name': fields.String(required=False, description='New name for the document'), 'language': fields.String(required=False, description='Language of the document'), 'user_context': fields.String(required=False, description='User context for the document'), 'user_metadata': fields.Raw(required=False, description='User metadata for the document') }) @document_ns.route('//refresh') class RefreshDocument(Resource): @jwt_required() @document_ns.response(200, 'Document refreshed successfully') @document_ns.response(404, 'Document not found') def post(self, document_id): """ Refresh a document without additional information """ tenant_id = get_jwt_identity() current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}') try: new_version, result = refresh_document(document_id) if new_version: return { 'message': f'Document refreshed successfully. New version: {new_version.id}. Task ID: {result}', 'document_id': document_id, 'document_version_id': new_version.id, 'task_id': result }, 200 else: return {'message': f'Error refreshing document: {result}'}, 400 except Exception as e: current_app.logger.error(f'Error refreshing document: {str(e)}') return {'message': 'Internal server error'}, 500 @document_ns.route('//refresh_with_info') class RefreshDocumentWithInfo(Resource): @jwt_required() @document_ns.expect(refresh_document_model) @document_ns.response(200, 'Document refreshed successfully') @document_ns.response(400, 'Validation Error') @document_ns.response(404, 'Document not found') def post(self, document_id): """ Refresh a document with new information """ tenant_id = get_jwt_identity() current_app.logger.info(f'Refreshing document {document_id} with info for tenant {tenant_id}') try: api_input = request.json new_version, result = refresh_document_with_info(document_id, api_input) if new_version: return { 'message': f'Document refreshed successfully with new info. New version: {new_version.id}. Task ID: {result}', 'document_id': document_id, 'document_version_id': new_version.id, 'task_id': result }, 200 else: return {'message': f'Error refreshing document with info: {result}'}, 400 except Exception as e: current_app.logger.error(f'Error refreshing document with info: {str(e)}') return {'message': 'Internal server error'}, 500