- Introduction of API-functionality (to be continued). Deduplication of document and url uploads between views and api. - Improvements on document processing - introduction of processor classes to streamline document inputs - Removed pure Youtube functionality, as Youtube retrieval of documents continuously changes. But added upload of srt, mp3, ogg and mp4
179 lines
7.2 KiB
Python
179 lines
7.2 KiB
Python
from flask_restful import Resource, reqparse
|
|
from flask import current_app
|
|
from flask_jwt_extended import jwt_required, get_jwt_identity
|
|
from werkzeug.datastructures import FileStorage
|
|
from werkzeug.utils import secure_filename
|
|
from common.utils.document_utils import (
|
|
create_document_stack, process_url, start_embedding_task,
|
|
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
|
process_multiple_urls, prepare_youtube_document
|
|
)
|
|
from common.utils.eveai_exceptions import EveAIYoutubeError
|
|
|
|
|
|
class AddDocumentResource(Resource):
|
|
@jwt_required()
|
|
def post(self):
|
|
parser = reqparse.RequestParser()
|
|
parser.add_argument('file', type=FileStorage, location='files', required=True)
|
|
parser.add_argument('name', type=str, required=False)
|
|
parser.add_argument('language', type=str, required=True)
|
|
parser.add_argument('user_context', type=str, required=False)
|
|
parser.add_argument('valid_from', type=str, required=False)
|
|
args = parser.parse_args()
|
|
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document for tenant {tenant_id}')
|
|
|
|
try:
|
|
file = args['file']
|
|
filename = secure_filename(file.filename)
|
|
extension = filename.rsplit('.', 1)[1].lower()
|
|
|
|
validate_file_type(extension)
|
|
|
|
api_input = {
|
|
'name': args['name'] or filename,
|
|
'language': args['language'],
|
|
'user_context': args['user_context'],
|
|
'valid_from': args['valid_from']
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
return {'message': str(e)}, 400
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
|
return {'message': 'Error adding document'}, 500
|
|
|
|
|
|
class AddURLResource(Resource):
|
|
@jwt_required()
|
|
def post(self):
|
|
parser = reqparse.RequestParser()
|
|
parser.add_argument('url', type=str, required=True)
|
|
parser.add_argument('name', type=str, required=False)
|
|
parser.add_argument('language', type=str, required=True)
|
|
parser.add_argument('user_context', type=str, required=False)
|
|
parser.add_argument('valid_from', type=str, required=False)
|
|
args = parser.parse_args()
|
|
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
|
|
|
|
try:
|
|
file_content, filename, extension = process_url(args['url'], tenant_id)
|
|
|
|
api_input = {
|
|
'url': args['url'],
|
|
'name': args['name'] or filename,
|
|
'language': args['language'],
|
|
'user_context': args['user_context'],
|
|
'valid_from': args['valid_from']
|
|
}
|
|
|
|
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except EveAIDoubleURLException:
|
|
return {'message': f'A document with URL {args["url"]} already exists.'}, 400
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
return {'message': str(e)}, 400
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding document from URL: {str(e)}')
|
|
return {'message': 'Error adding document from URL'}, 500
|
|
|
|
|
|
class AddMultipleURLsResource(Resource):
|
|
@jwt_required()
|
|
def post(self):
|
|
parser = reqparse.RequestParser()
|
|
parser.add_argument('urls', type=str, action='append', required=True)
|
|
parser.add_argument('name', type=str, required=False)
|
|
parser.add_argument('language', type=str, required=True)
|
|
parser.add_argument('user_context', type=str, required=False)
|
|
parser.add_argument('valid_from', type=str, required=False)
|
|
args = parser.parse_args()
|
|
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding multiple documents from URLs for tenant {tenant_id}')
|
|
|
|
try:
|
|
api_input = {
|
|
'name': args['name'],
|
|
'language': args['language'],
|
|
'user_context': args['user_context'],
|
|
'valid_from': args['valid_from']
|
|
}
|
|
|
|
results = process_multiple_urls(args['urls'], tenant_id, api_input)
|
|
|
|
return {
|
|
'message': 'Processing of multiple URLs completed',
|
|
'results': results
|
|
}, 201
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding documents from URLs: {str(e)}')
|
|
return {'message': 'Error adding documents from URLs'}, 500
|
|
|
|
|
|
class AddYoutubeResource(Resource):
|
|
@jwt_required()
|
|
def post(self):
|
|
parser = reqparse.RequestParser()
|
|
parser.add_argument('url', type=str, required=True)
|
|
parser.add_argument('name', type=str, required=False)
|
|
parser.add_argument('language', type=str, required=True)
|
|
parser.add_argument('user_context', type=str, required=False)
|
|
parser.add_argument('valid_from', type=str, required=False)
|
|
args = parser.parse_args()
|
|
|
|
tenant_id = get_jwt_identity()
|
|
current_app.logger.info(f'Adding YouTube document for tenant {tenant_id}')
|
|
|
|
try:
|
|
api_input = {
|
|
'name': args['name'],
|
|
'language': args['language'],
|
|
'user_context': args['user_context'],
|
|
'valid_from': args['valid_from']
|
|
}
|
|
|
|
new_doc, new_doc_vers = prepare_youtube_document(args['url'], tenant_id, api_input)
|
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
|
|
|
return {
|
|
'message': f'Processing on YouTube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
|
'document_id': new_doc.id,
|
|
'document_version_id': new_doc_vers.id,
|
|
'task_id': task_id
|
|
}, 201
|
|
|
|
except EveAIYoutubeError as e:
|
|
return {'message': str(e)}, 400
|
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
|
return {'message': str(e)}, 400
|
|
except Exception as e:
|
|
current_app.logger.error(f'Error adding YouTube document: {str(e)}')
|
|
return {'message': 'Error adding YouTube document'}, 500
|
|
|
|
|
|
# You can add more API resources here as needed
|