- Improvements on document uploads (accept other files than html-files when entering a URL)

- Introduction of API-functionality (to be continued). Deduplication of document and url uploads between views and api.
- Improvements on document processing - introduction of processor classes to streamline document inputs
- Removed pure Youtube functionality, as Youtube retrieval of documents continuously changes. But added upload of srt, mp3, ogg and mp4
This commit is contained in:
Josako
2024-09-02 12:37:44 +02:00
parent a158655247
commit 914c265afe
21 changed files with 1425 additions and 852 deletions

View File

@@ -1,4 +1,72 @@
# from flask import Blueprint, request
#
# public_api_bp = Blueprint("public", __name__, url_prefix="/api/v1")
# tenant_api_bp = Blueprint("tenant", __name__, url_prefix="/api/v1/tenant")
from flask import Flask, jsonify
from flask_jwt_extended import get_jwt_identity
from common.extensions import db, api, jwt, minio_client
import os
import logging.config
from common.utils.database import Database
from config.logging_config import LOGGING
from .api.document_api import AddDocumentResource
from .api.auth import TokenResource
from config.config import get_config
from common.utils.celery_utils import make_celery, init_celery
from common.utils.eveai_exceptions import EveAIException
def create_app(config_file=None):
app = Flask(__name__)
environment = os.getenv('FLASK_ENV', 'development')
match environment:
case 'development':
app.config.from_object(get_config('dev'))
case 'production':
app.config.from_object(get_config('prod'))
case _:
app.config.from_object(get_config('dev'))
app.config['SESSION_KEY_PREFIX'] = 'eveai_api_'
app.celery = make_celery(app.name, app.config)
init_celery(app.celery, app)
logging.config.dictConfig(LOGGING)
logger = logging.getLogger(__name__)
logger.info("eveai_api starting up")
# Register Necessary Extensions
register_extensions(app)
# Error handler for the API
@app.errorhandler(EveAIException)
def handle_eveai_exception(error):
response = jsonify(error.to_dict())
response.status_code = error.status_code
return response
@api.before_request
def before_request():
# Extract tenant_id from the JWT token
tenant_id = get_jwt_identity()
# Switch to the correct schema
Database(tenant_id).switch_schema()
# Register resources
register_api_resources()
return app
def register_extensions(app):
db.init_app(app)
api.init_app(app)
jwt.init_app(app)
minio_client.init_app(app)
def register_api_resources():
api.add_resource(AddDocumentResource, '/api/v1/documents/add_document')
api.add_resource(TokenResource, '/api/v1/token')

24
eveai_api/api/auth.py Normal file
View File

@@ -0,0 +1,24 @@
from flask_restful import Resource, reqparse
from flask_jwt_extended import create_access_token
from common.models.user import Tenant
from common.extensions import simple_encryption
from flask import current_app
class TokenResource(Resource):
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('tenant_id', type=int, required=True)
parser.add_argument('api_key', type=str, required=True)
args = parser.parse_args()
tenant = Tenant.query.get(args['tenant_id'])
if not tenant:
return {'message': 'Tenant not found'}, 404
decrypted_api_key = simple_encryption.decrypt_api_key(tenant.encrypted_api_key)
if args['api_key'] != decrypted_api_key:
return {'message': 'Invalid API key'}, 401
access_token = create_access_token(identity={'tenant_id': tenant.id})
return {'access_token': access_token}, 200

View File

@@ -0,0 +1,178 @@
from flask_restful import Resource, reqparse
from flask import current_app
from flask_jwt_extended import jwt_required, get_jwt_identity
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
from common.utils.document_utils import (
create_document_stack, process_url, start_embedding_task,
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
process_multiple_urls, prepare_youtube_document
)
from common.utils.eveai_exceptions import EveAIYoutubeError
class AddDocumentResource(Resource):
@jwt_required()
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('file', type=FileStorage, location='files', required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document for tenant {tenant_id}')
try:
file = args['file']
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
validate_file_type(extension)
api_input = {
'name': args['name'] or filename,
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
}
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
return {
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id
}, 201
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
return {'message': str(e)}, 400
except Exception as e:
current_app.logger.error(f'Error adding document: {str(e)}')
return {'message': 'Error adding document'}, 500
class AddURLResource(Resource):
@jwt_required()
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('url', type=str, required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
try:
file_content, filename, extension = process_url(args['url'], tenant_id)
api_input = {
'url': args['url'],
'name': args['name'] or filename,
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
}
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
return {
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id
}, 201
except EveAIDoubleURLException:
return {'message': f'A document with URL {args["url"]} already exists.'}, 400
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
return {'message': str(e)}, 400
except Exception as e:
current_app.logger.error(f'Error adding document from URL: {str(e)}')
return {'message': 'Error adding document from URL'}, 500
class AddMultipleURLsResource(Resource):
@jwt_required()
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('urls', type=str, action='append', required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding multiple documents from URLs for tenant {tenant_id}')
try:
api_input = {
'name': args['name'],
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
}
results = process_multiple_urls(args['urls'], tenant_id, api_input)
return {
'message': 'Processing of multiple URLs completed',
'results': results
}, 201
except Exception as e:
current_app.logger.error(f'Error adding documents from URLs: {str(e)}')
return {'message': 'Error adding documents from URLs'}, 500
class AddYoutubeResource(Resource):
@jwt_required()
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('url', type=str, required=True)
parser.add_argument('name', type=str, required=False)
parser.add_argument('language', type=str, required=True)
parser.add_argument('user_context', type=str, required=False)
parser.add_argument('valid_from', type=str, required=False)
args = parser.parse_args()
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding YouTube document for tenant {tenant_id}')
try:
api_input = {
'name': args['name'],
'language': args['language'],
'user_context': args['user_context'],
'valid_from': args['valid_from']
}
new_doc, new_doc_vers = prepare_youtube_document(args['url'], tenant_id, api_input)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
return {
'message': f'Processing on YouTube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id
}, 201
except EveAIYoutubeError as e:
return {'message': str(e)}, 400
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
return {'message': str(e)}, 400
except Exception as e:
current_app.logger.error(f'Error adding YouTube document: {str(e)}')
return {'message': 'Error adding YouTube document'}, 500
# You can add more API resources here as needed

View File

@@ -1,7 +0,0 @@
from flask import request
from flask.views import MethodView
class RegisterAPI(MethodView):
def post(self):
username = request.json['username']