Youtube added - further checking required

This commit is contained in:
Josako
2024-07-04 08:11:31 +02:00
parent 19e57f5adf
commit 8e1dac0233
17 changed files with 386 additions and 11 deletions

View File

@@ -17,7 +17,7 @@ import io
from common.models.document import Document, DocumentVersion
from common.extensions import db
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm
from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for
@@ -88,7 +88,7 @@ def add_url():
# If the form is submitted
if form.validate_on_submit():
current_app.logger.info(f'Adding document for tenant {session["tenant"]["id"]}')
current_app.logger.info(f'Adding url for tenant {session["tenant"]["id"]}')
url = form.url.data
doc_vers = DocumentVersion.query.filter_by(url=url).all()
@@ -129,6 +129,50 @@ def add_url():
return render_template('document/add_url.html', form=form)
@document_bp.route('/add_youtube', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_youtube():
form = AddYoutubeForm()
if form.validate_on_submit():
current_app.logger.info(f'Adding Youtube document for tenant {session["tenant"]["id"]}')
url = form.url.data
current_app.logger.debug(f'Value of language field: {form.language.data}')
doc_vers = DocumentVersion.query.filter_by(url=url).all()
if doc_vers:
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
flash(f'A document with url {url} already exists. No new document created.', 'info')
return redirect(prefixed_url_for('document_bp.documents'))
# As downloading a Youtube document can take quite some time, we offload this downloading to the worker
# We just pass a simple file to get things conform
file = "Youtube placeholder file"
filename = 'placeholder.youtube'
extension = 'youtube'
form_dict = form_to_dict(form)
current_app.logger.debug(f'Form data: {form_dict}')
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Processing and Embedding on Youtube document started for tenant '
f'{session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Processing and Embedding Youtube task: {task.id}')
flash(f'Processing on Youtube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
'success')
return redirect(prefixed_url_for('document_bp.documents'))
else:
form_validation_failed(request, form)
return render_template('document/add_youtube.html', form=form)
@document_bp.route('/documents', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def documents():
@@ -381,7 +425,11 @@ def create_document_stack(form, file, filename, extension):
new_doc = create_document(form, filename)
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, form.get('url', ''), form['language'], form['user_context'])
new_doc_vers = create_version_for_document(new_doc,
form.get('url', ''),
form.get('language', 'en'),
form.get('user_context', '')
)
try:
db.session.add(new_doc)
@@ -462,6 +510,10 @@ def upload_file_for_version(doc_vers, file, extension):
# Example: write content to a file manually
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
f.write(file.getvalue())
elif isinstance(file, str):
# It's a string, handle accordingly
with open(os.path.join(upload_path, doc_vers.file_name), 'w') as f:
f.write(file)
else:
raise TypeError('Unsupported file type.')