- Full API application, streamlined, de-duplication of document handling code into document_utils.py

- Added meta-data fields to DocumentVersion
- Docker container to support API
This commit is contained in:
Josako
2024-09-09 16:11:42 +02:00
parent 341ba47d1c
commit 76cb825660
39 changed files with 598 additions and 6177 deletions

View File

@@ -1,13 +1,13 @@
from flask import Flask, jsonify
from flask_jwt_extended import get_jwt_identity
from common.extensions import db, api, jwt, minio_client
from flask import Flask, jsonify, request
from flask_jwt_extended import get_jwt_identity, verify_jwt_in_request
from common.extensions import db, api_rest, jwt, minio_client, simple_encryption
import os
import logging.config
from common.utils.database import Database
from config.logging_config import LOGGING
from .api.document_api import AddDocumentResource
from .api.auth import TokenResource
from .api.document_api import document_ns
from .api.auth import auth_ns
from config.config import get_config
from common.utils.celery_utils import make_celery, init_celery
from common.utils.eveai_exceptions import EveAIException
@@ -39,34 +39,51 @@ def create_app(config_file=None):
# Register Necessary Extensions
register_extensions(app)
# register Blueprints
register_namespaces(api_rest)
# Error handler for the API
@app.errorhandler(EveAIException)
def handle_eveai_exception(error):
response = jsonify(error.to_dict())
response.status_code = error.status_code
return response
return {'message': str(error)}, error.status_code
@api.before_request
@app.before_request
def before_request():
# Extract tenant_id from the JWT token
tenant_id = get_jwt_identity()
app.logger.debug(f'Before request: {request.method} {request.path}')
# Switch to the correct schema
Database(tenant_id).switch_schema()
# Check if this is a request to the token endpoint
if request.path == '/api/v1/token' and request.method == 'POST':
app.logger.debug('Token request detected, skipping JWT verification')
return
# Register resources
register_api_resources()
try:
verify_jwt_in_request(optional=True)
tenant_id = get_jwt_identity()
app.logger.debug(f'Tenant ID from JWT: {tenant_id}')
if tenant_id:
Database(tenant_id).switch_schema()
app.logger.debug(f'Switched to schema for tenant {tenant_id}')
else:
app.logger.debug('No tenant ID found in JWT')
except Exception as e:
app.logger.error(f'Error in before_request: {str(e)}')
# Don't raise the exception here, let the request continue
# The appropriate error handling will be done in the specific endpoints
return app
return app
def register_extensions(app):
db.init_app(app)
api.init_app(app)
api_rest.init_app(app, title='EveAI API', version='1.0', description='EveAI API')
jwt.init_app(app)
minio_client.init_app(app)
simple_encryption.init_app(app)
def register_api_resources():
api.add_resource(AddDocumentResource, '/api/v1/documents/add_document')
api.add_resource(TokenResource, '/api/v1/token')
def register_namespaces(app):
api_rest.add_namespace(document_ns, path='/api/v1/documents')
api_rest.add_namespace(auth_ns, path='/api/v1')

View File

@@ -1,24 +1,66 @@
from flask_restful import Resource, reqparse
from flask_restx import Namespace, Resource, fields
from flask_jwt_extended import create_access_token
from common.models.user import Tenant
from common.extensions import simple_encryption
from flask import current_app
from flask import current_app, request
auth_ns = Namespace('auth', description='Authentication related operations')
token_model = auth_ns.model('Token', {
'tenant_id': fields.Integer(required=True, description='Tenant ID'),
'api_key': fields.String(required=True, description='API Key')
})
token_response = auth_ns.model('TokenResponse', {
'access_token': fields.String(description='JWT access token')
})
class TokenResource(Resource):
@auth_ns.route('/token')
class Token(Resource):
@auth_ns.expect(token_model)
@auth_ns.response(200, 'Success', token_response)
@auth_ns.response(400, 'Validation Error')
@auth_ns.response(401, 'Unauthorized')
@auth_ns.response(404, 'Tenant Not Found')
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('tenant_id', type=int, required=True)
parser.add_argument('api_key', type=str, required=True)
args = parser.parse_args()
"""
Get JWT token
"""
current_app.logger.debug(f"Token endpoint called with data: {request.json}")
try:
tenant_id = auth_ns.payload['tenant_id']
api_key = auth_ns.payload['api_key']
except KeyError as e:
current_app.logger.error(f"Missing required field: {e}")
return {'message': f"Missing required field: {e}"}, 400
current_app.logger.debug(f"Querying database for tenant: {tenant_id}")
tenant = Tenant.query.get(tenant_id)
tenant = Tenant.query.get(args['tenant_id'])
if not tenant:
return {'message': 'Tenant not found'}, 404
current_app.logger.error(f"Tenant not found: {tenant_id}")
return {'message': "Tenant not found"}, 404
decrypted_api_key = simple_encryption.decrypt_api_key(tenant.encrypted_api_key)
if args['api_key'] != decrypted_api_key:
return {'message': 'Invalid API key'}, 401
current_app.logger.debug(f"Tenant found: {tenant.id}")
access_token = create_access_token(identity={'tenant_id': tenant.id})
return {'access_token': access_token}, 200
try:
current_app.logger.debug("Attempting to decrypt API key")
decrypted_api_key = simple_encryption.decrypt_api_key(tenant.encrypted_api_key)
except Exception as e:
current_app.logger.error(f"Error decrypting API key: {e}")
return {'message': "Internal server error"}, 500
if api_key != decrypted_api_key:
current_app.logger.error(f"Invalid API key for tenant: {tenant_id}")
return {'message': "Invalid API key"}, 401
try:
current_app.logger.debug(f"Creating access token for tenant: {tenant_id}")
access_token = create_access_token(identity=tenant_id)
current_app.logger.debug("Access token created successfully")
return {'access_token': access_token}, 200
except Exception as e:
current_app.logger.error(f"Error creating access token: {e}")
return {'message': "Internal server error"}, 500

View File

@@ -1,31 +1,72 @@
from flask_restful import Resource, reqparse
from flask import current_app
import json
from datetime import datetime
import pytz
from flask import current_app, request
from flask_restx import Namespace, Resource, fields, reqparse
from flask_jwt_extended import jwt_required, get_jwt_identity
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
from common.utils.document_utils import (
create_document_stack, process_url, start_embedding_task,
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
process_multiple_urls, prepare_youtube_document
process_multiple_urls, get_documents_list, edit_document, refresh_document, edit_document_version
)
from common.utils.eveai_exceptions import EveAIYoutubeError
class AddDocumentResource(Resource):
def validate_date(date_str):
try:
return datetime.fromisoformat(date_str).replace(tzinfo=pytz.UTC)
except ValueError:
raise ValueError("Invalid date format. Use ISO format (YYYY-MM-DDTHH:MM:SS).")
def validate_json(json_str):
try:
return json.loads(json_str)
except json.JSONDecodeError:
raise ValueError("Invalid JSON format for user_metadata.")
document_ns = Namespace('documents', description='Document related operations')
# Define models for request parsing and response serialization
upload_parser = reqparse.RequestParser()
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
upload_parser.add_argument('user_context', location='form', type=str, required=False,
help='User context for the document')
upload_parser.add_argument('valid_from', location='form', type=validate_date, required=False,
help='Valid from date for the document (ISO format)')
upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False,
help='User metadata for the document (JSON format)')
add_document_response = document_ns.model('AddDocumentResponse', {
'message': fields.String(description='Status message'),
'document_id': fields.Integer(description='ID of the created document'),
'document_version_id': fields.Integer(description='ID of the created document version'),
'task_id': fields.String(description='ID of the embedding task')
})
@document_ns.route('/add_document')
class AddDocument(Resource):
@jwt_required()
@document_ns.expect(upload_parser)
@document_ns.response(201, 'Document added successfully', add_document_response)
@document_ns.response(400, 'Validation Error')
@document_ns.response(500, 'Internal Server Error')
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('file', type=FileStorage, location='files', required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
"""
Add a new document
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document for tenant {tenant_id}')
try:
args = upload_parser.parse_args()
file = args['file']
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
@@ -33,10 +74,11 @@ class AddDocumentResource(Resource):
validate_file_type(extension)
api_input = {
'name': args['name'] or filename,
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
'name': args.get('name') or filename,
'language': args.get('language'),
'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'),
}
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
@@ -50,35 +92,57 @@ class AddDocumentResource(Resource):
}, 201
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
return {'message': str(e)}, 400
current_app.logger.error(f'Error adding document: {str(e)}')
document_ns.abort(400, str(e))
except Exception as e:
current_app.logger.error(f'Error adding document: {str(e)}')
return {'message': 'Error adding document'}, 500
document_ns.abort(500, 'Error adding document')
class AddURLResource(Resource):
# Models for AddURL
add_url_model = document_ns.model('AddURL', {
'url': fields.String(required=True, description='URL of the document to add'),
'name': fields.String(required=False, description='Name of the document'),
'language': fields.String(required=True, description='Language of the document'),
'user_context': fields.String(required=False, description='User context for the document'),
'valid_from': fields.String(required=False, description='Valid from date for the document'),
'user_metadata': fields.String(required=False, description='User metadata for the document'),
'system_metadata': fields.String(required=False, description='System metadata for the document')
})
add_url_response = document_ns.model('AddURLResponse', {
'message': fields.String(description='Status message'),
'document_id': fields.Integer(description='ID of the created document'),
'document_version_id': fields.Integer(description='ID of the created document version'),
'task_id': fields.String(description='ID of the embedding task')
})
@document_ns.route('/add_url')
class AddURL(Resource):
@jwt_required()
@document_ns.expect(add_url_model)
@document_ns.response(201, 'Document added successfully', add_url_response)
@document_ns.response(400, 'Validation Error')
@document_ns.response(500, 'Internal Server Error')
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('url', type=str, required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
"""
Add a new document from URL
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
try:
args = document_ns.payload
file_content, filename, extension = process_url(args['url'], tenant_id)
api_input = {
'url': args['url'],
'name': args['name'] or filename,
'name': args.get('name') or filename,
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'),
}
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
@@ -92,87 +156,85 @@ class AddURLResource(Resource):
}, 201
except EveAIDoubleURLException:
return {'message': f'A document with URL {args["url"]} already exists.'}, 400
document_ns.abort(400, f'A document with URL {args["url"]} already exists.')
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
return {'message': str(e)}, 400
document_ns.abort(400, str(e))
except Exception as e:
current_app.logger.error(f'Error adding document from URL: {str(e)}')
return {'message': 'Error adding document from URL'}, 500
document_ns.abort(500, 'Error adding document from URL')
class AddMultipleURLsResource(Resource):
document_list_model = document_ns.model('DocumentList', {
'id': fields.Integer(description='Document ID'),
'name': fields.String(description='Document name'),
'valid_from': fields.DateTime(description='Valid from date'),
'valid_to': fields.DateTime(description='Valid to date'),
})
@document_ns.route('/list')
class DocumentList(Resource):
@jwt_required()
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('urls', type=str, action='append', required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding multiple documents from URLs for tenant {tenant_id}')
try:
api_input = {
'name': args['name'],
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
}
results = process_multiple_urls(args['urls'], tenant_id, api_input)
return {
'message': 'Processing of multiple URLs completed',
'results': results
}, 201
except Exception as e:
current_app.logger.error(f'Error adding documents from URLs: {str(e)}')
return {'message': 'Error adding documents from URLs'}, 500
@document_ns.doc('list_documents')
@document_ns.marshal_list_with(document_list_model, envelope='documents')
def get(self):
"""List all documents"""
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
pagination = get_documents_list(page, per_page)
return pagination.items, 200
class AddYoutubeResource(Resource):
edit_document_model = document_ns.model('EditDocument', {
'name': fields.String(required=True, description='New name for the document'),
'valid_from': fields.DateTime(required=False, description='New valid from date'),
'valid_to': fields.DateTime(required=False, description='New valid to date'),
})
@document_ns.route('/<int:document_id>')
class DocumentResource(Resource):
@jwt_required()
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('url', type=str, required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
@document_ns.doc('edit_document')
@document_ns.expect(edit_document_model)
@document_ns.response(200, 'Document updated successfully')
def put(self, document_id):
"""Edit a document"""
data = request.json
updated_doc, error = edit_document(document_id, data['name'], data.get('valid_from'), data.get('valid_to'))
if updated_doc:
return {'message': f'Document {updated_doc.id} updated successfully'}, 200
else:
return {'message': f'Error updating document: {error}'}, 400
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding YouTube document for tenant {tenant_id}')
try:
api_input = {
'name': args['name'],
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
}
new_doc, new_doc_vers = prepare_youtube_document(args['url'], tenant_id, api_input)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
return {
'message': f'Processing on YouTube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id
}, 201
except EveAIYoutubeError as e:
return {'message': str(e)}, 400
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
return {'message': str(e)}, 400
except Exception as e:
current_app.logger.error(f'Error adding YouTube document: {str(e)}')
return {'message': 'Error adding YouTube document'}, 500
@jwt_required()
@document_ns.doc('refresh_document')
@document_ns.response(200, 'Document refreshed successfully')
def post(self, document_id):
"""Refresh a document"""
new_version, result = refresh_document(document_id)
if new_version:
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
else:
return {'message': f'Error refreshing document: {result}'}, 400
# You can add more API resources here as needed
edit_document_version_model = document_ns.model('EditDocumentVersion', {
'user_context': fields.String(required=True, description='New user context for the document version'),
})
@document_ns.route('/version/<int:version_id>')
class DocumentVersionResource(Resource):
@jwt_required()
@document_ns.doc('edit_document_version')
@document_ns.expect(edit_document_version_model)
@document_ns.response(200, 'Document version updated successfully')
def put(self, version_id):
"""Edit a document version"""
data = request.json
updated_version, error = edit_document_version(version_id, data['user_context'])
if updated_version:
return {'message': f'Document Version {updated_version.id} updated successfully'}, 200
else:
return {'message': f'Error updating document version: {error}'}, 400