Youtube added - further checking required

This commit is contained in:
Josako
2024-07-04 08:11:31 +02:00
parent 19e57f5adf
commit 8e1dac0233
17 changed files with 386 additions and 11 deletions

View File

@@ -7,7 +7,7 @@
{% block content_description %}Add a url and the corresponding document to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %}
{% block content %}
<form method="post" enctype="multipart/form-data">
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}

View File

@@ -0,0 +1,24 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Add Youtube Document{% endblock %}
{% block content_title %}Add Youtube Document{% endblock %}
{% block content_description %}Add a youtube url and the corresponding document to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Add Youtube Document</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -83,6 +83,7 @@
{{ dropdown('Document Mgmt', 'contacts', [
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add Youtube Document' , 'url': '/document/add_youtube', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Documents', 'url': '/document/documents', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Library Operations', 'url': '/document/library_operations', 'roles': ['Super User', 'Tenant Admin']},
]) }}

View File

@@ -20,7 +20,6 @@ class AddDocumentForm(FlaskForm):
super().__init__()
self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')]
self.language.data = session.get('default_language')
class AddURLForm(FlaskForm):
@@ -36,7 +35,21 @@ class AddURLForm(FlaskForm):
super().__init__()
self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')]
self.language.data = session.get('default_language')
class AddYoutubeForm(FlaskForm):
url = URLField('Youtube URL', validators=[DataRequired(), URL()])
name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()])
user_context = TextAreaField('User Context', validators=[Optional()])
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
submit = SubmitField('Submit')
def __init__(self):
super().__init__()
self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')]
class EditDocumentForm(FlaskForm):

View File

@@ -17,7 +17,7 @@ import io
from common.models.document import Document, DocumentVersion
from common.extensions import db
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm
from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for
@@ -88,7 +88,7 @@ def add_url():
# If the form is submitted
if form.validate_on_submit():
current_app.logger.info(f'Adding document for tenant {session["tenant"]["id"]}')
current_app.logger.info(f'Adding url for tenant {session["tenant"]["id"]}')
url = form.url.data
doc_vers = DocumentVersion.query.filter_by(url=url).all()
@@ -129,6 +129,50 @@ def add_url():
return render_template('document/add_url.html', form=form)
@document_bp.route('/add_youtube', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_youtube():
form = AddYoutubeForm()
if form.validate_on_submit():
current_app.logger.info(f'Adding Youtube document for tenant {session["tenant"]["id"]}')
url = form.url.data
current_app.logger.debug(f'Value of language field: {form.language.data}')
doc_vers = DocumentVersion.query.filter_by(url=url).all()
if doc_vers:
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
flash(f'A document with url {url} already exists. No new document created.', 'info')
return redirect(prefixed_url_for('document_bp.documents'))
# As downloading a Youtube document can take quite some time, we offload this downloading to the worker
# We just pass a simple file to get things conform
file = "Youtube placeholder file"
filename = 'placeholder.youtube'
extension = 'youtube'
form_dict = form_to_dict(form)
current_app.logger.debug(f'Form data: {form_dict}')
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Processing and Embedding on Youtube document started for tenant '
f'{session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Processing and Embedding Youtube task: {task.id}')
flash(f'Processing on Youtube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
'success')
return redirect(prefixed_url_for('document_bp.documents'))
else:
form_validation_failed(request, form)
return render_template('document/add_youtube.html', form=form)
@document_bp.route('/documents', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def documents():
@@ -381,7 +425,11 @@ def create_document_stack(form, file, filename, extension):
new_doc = create_document(form, filename)
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, form.get('url', ''), form['language'], form['user_context'])
new_doc_vers = create_version_for_document(new_doc,
form.get('url', ''),
form.get('language', 'en'),
form.get('user_context', '')
)
try:
db.session.add(new_doc)
@@ -462,6 +510,10 @@ def upload_file_for_version(doc_vers, file, extension):
# Example: write content to a file manually
with open(os.path.join(upload_path, doc_vers.file_name), 'wb') as f:
f.write(file.getvalue())
elif isinstance(file, str):
# It's a string, handle accordingly
with open(os.path.join(upload_path, doc_vers.file_name), 'w') as f:
f.write(file)
else:
raise TypeError('Unsupported file type.')