diff --git a/eveai_app/templates/document/add_urls.html b/eveai_app/templates/document/add_urls.html new file mode 100644 index 0000000..da51a5c --- /dev/null +++ b/eveai_app/templates/document/add_urls.html @@ -0,0 +1,24 @@ +{% extends 'base.html' %} +{% from "macros.html" import render_field %} + +{% block title %}Add URLs{% endblock %} + +{% block content_title %}Add URLs{% endblock %} +{% block content_description %}Add a list of URLs and the corresponding documents to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %} + +{% block content %} +
+ {{ form.hidden_tag() }} + {% set disabled_fields = [] %} + {% set exclude_fields = [] %} + {% for field in form %} + {{ render_field(field, disabled_fields, exclude_fields) }} + {% endfor %} + +
+{% endblock %} + + +{% block content_footer %} + +{% endblock %} \ No newline at end of file diff --git a/eveai_app/views/document_forms.py b/eveai_app/views/document_forms.py index 30de07a..caef422 100644 --- a/eveai_app/views/document_forms.py +++ b/eveai_app/views/document_forms.py @@ -37,6 +37,21 @@ class AddURLForm(FlaskForm): session.get('tenant').get('allowed_languages')] +class AddURLsForm(FlaskForm): + urls = TextAreaField('URL(s) (one per line)', validators=[DataRequired()]) + name = StringField('Name Prefix', validators=[Length(max=100)]) + language = SelectField('Language', choices=[], validators=[Optional()]) + user_context = TextAreaField('User Context', validators=[Optional()]) + valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()]) + + submit = SubmitField('Submit') + + def __init__(self): + super().__init__() + self.language.choices = [(language, language) for language in + session.get('tenant').get('allowed_languages')] + + class AddYoutubeForm(FlaskForm): url = URLField('Youtube URL', validators=[DataRequired(), URL()]) name = StringField('Name', validators=[Length(max=100)]) diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py index 58af1e5..b21bc1d 100644 --- a/eveai_app/views/document_views.py +++ b/eveai_app/views/document_views.py @@ -17,7 +17,8 @@ import io from common.models.document import Document, DocumentVersion from common.extensions import db -from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm +from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \ + AddURLsForm from common.utils.middleware import mw_before_request from common.utils.celery_utils import current_celery from common.utils.nginx_utils import prefixed_url_for @@ -129,6 +130,63 @@ def add_url(): return render_template('document/add_url.html', form=form) +@document_bp.route('/add_urls', methods=['GET', 'POST']) +@roles_accepted('Super User', 'Tenant Admin') +def add_urls(): + form = AddURLsForm() + + if form.validate_on_submit(): + urls = form.urls.data.split('\n') + urls = [url.strip() for url in urls if url.strip()] + + for i, url in enumerate(urls): + try: + doc_vers = DocumentVersion.query.filter_by(url=url).all() + if doc_vers: + current_app.logger.info(f'A document with url {url} already exists. No new document created.') + flash(f'A document with url {url} already exists. No new document created.', 'info') + continue + + html = fetch_html(url) + file = io.BytesIO(html) + + parsed_url = urlparse(url) + path_parts = parsed_url.path.split('/') + filename = path_parts[-1] if path_parts[-1] else 'index' + if not filename.endswith('.html'): + filename += '.html' + + # Use the name prefix if provided, otherwise use the filename + doc_name = f"{form.name.data}-{filename}" if form.name.data else filename + + new_doc, new_doc_vers = create_document_stack({ + 'name': doc_name, + 'url': url, + 'language': form.language.data, + 'user_context': form.user_context.data, + 'valid_from': form.valid_from.data + }, file, filename, 'html') + + task = current_celery.send_task('create_embeddings', queue='embeddings', args=[ + session['tenant']['id'], + new_doc_vers.id, + ]) + current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, ' + f'Document Version {new_doc_vers.id}. ' + f'Embedding creation task: {task.id}') + flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.', + 'success') + + except Exception as e: + current_app.logger.error(f"Error processing URL {url}: {str(e)}") + flash(f'Error processing URL {url}: {str(e)}', 'danger') + + return redirect(prefixed_url_for('document_bp.documents')) + else: + form_validation_failed(request, form) + + return render_template('document/add_urls.html', form=form) + @document_bp.route('/add_youtube', methods=['GET', 'POST']) @roles_accepted('Super User', 'Tenant Admin') def add_youtube():