Allow for a list of URLs to be entered into the system.

This commit is contained in:
Josako
2024-07-08 15:17:10 +02:00
parent c55fc6b7ce
commit 8f08d6e1ae
3 changed files with 98 additions and 1 deletions

View File

@@ -0,0 +1,24 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Add URLs{% endblock %}
{% block content_title %}Add URLs{% endblock %}
{% block content_description %}Add a list of URLs and the corresponding documents to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Add URL</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -37,6 +37,21 @@ class AddURLForm(FlaskForm):
session.get('tenant').get('allowed_languages')]
class AddURLsForm(FlaskForm):
urls = TextAreaField('URL(s) (one per line)', validators=[DataRequired()])
name = StringField('Name Prefix', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()])
user_context = TextAreaField('User Context', validators=[Optional()])
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
submit = SubmitField('Submit')
def __init__(self):
super().__init__()
self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')]
class AddYoutubeForm(FlaskForm):
url = URLField('Youtube URL', validators=[DataRequired(), URL()])
name = StringField('Name', validators=[Length(max=100)])

View File

@@ -17,7 +17,8 @@ import io
from common.models.document import Document, DocumentVersion
from common.extensions import db
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
AddURLsForm
from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for
@@ -129,6 +130,63 @@ def add_url():
return render_template('document/add_url.html', form=form)
@document_bp.route('/add_urls', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_urls():
form = AddURLsForm()
if form.validate_on_submit():
urls = form.urls.data.split('\n')
urls = [url.strip() for url in urls if url.strip()]
for i, url in enumerate(urls):
try:
doc_vers = DocumentVersion.query.filter_by(url=url).all()
if doc_vers:
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
flash(f'A document with url {url} already exists. No new document created.', 'info')
continue
html = fetch_html(url)
file = io.BytesIO(html)
parsed_url = urlparse(url)
path_parts = parsed_url.path.split('/')
filename = path_parts[-1] if path_parts[-1] else 'index'
if not filename.endswith('.html'):
filename += '.html'
# Use the name prefix if provided, otherwise use the filename
doc_name = f"{form.name.data}-{filename}" if form.name.data else filename
new_doc, new_doc_vers = create_document_stack({
'name': doc_name,
'url': url,
'language': form.language.data,
'user_context': form.user_context.data,
'valid_from': form.valid_from.data
}, file, filename, 'html')
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Embedding creation task: {task.id}')
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
'success')
except Exception as e:
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
flash(f'Error processing URL {url}: {str(e)}', 'danger')
return redirect(prefixed_url_for('document_bp.documents'))
else:
form_validation_failed(request, form)
return render_template('document/add_urls.html', form=form)
@document_bp.route('/add_youtube', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_youtube():