- Full API application, streamlined, de-duplication of document handling code into document_utils.py
- Added meta-data fields to DocumentVersion - Docker container to support API
This commit is contained in:
@@ -1,24 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from "macros.html" import render_field %}
|
||||
|
||||
{% block title %}Add Youtube Document{% endblock %}
|
||||
|
||||
{% block content_title %}Add Youtube Document{% endblock %}
|
||||
{% block content_description %}Add a youtube url and the corresponding document to EveAI. In some cases, url's cannot be loaded directly. Download the html and add it as a document in that case.{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<form method="post">
|
||||
{{ form.hidden_tag() }}
|
||||
{% set disabled_fields = [] %}
|
||||
{% set exclude_fields = [] %}
|
||||
{% for field in form %}
|
||||
{{ render_field(field, disabled_fields, exclude_fields) }}
|
||||
{% endfor %}
|
||||
<button type="submit" class="btn btn-primary">Add Youtube Document</button>
|
||||
</form>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block content_footer %}
|
||||
|
||||
{% endblock %}
|
||||
@@ -8,7 +8,7 @@
|
||||
{% block content %}
|
||||
<form method="post">
|
||||
{{ form.hidden_tag() }}
|
||||
{% set disabled_fields = ['language', 'system_context'] %}
|
||||
{% set disabled_fields = ['language', 'system_context', 'system_metadata'] %}
|
||||
{% set exclude_fields = [] %}
|
||||
{% for field in form %}
|
||||
{{ render_field(field, disabled_fields, exclude_fields) }}
|
||||
|
||||
@@ -4,6 +4,7 @@ from wtforms import (StringField, BooleanField, SubmitField, DateField,
|
||||
SelectField, FieldList, FormField, TextAreaField, URLField)
|
||||
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError
|
||||
from flask_wtf.file import FileField, FileAllowed, FileRequired
|
||||
import json
|
||||
|
||||
|
||||
def allowed_file(form, field):
|
||||
@@ -14,12 +15,22 @@ def allowed_file(form, field):
|
||||
raise ValidationError('Unsupported file type.')
|
||||
|
||||
|
||||
def validate_json(form, field):
|
||||
if field.data:
|
||||
try:
|
||||
json.loads(field.data)
|
||||
except json.JSONDecodeError:
|
||||
raise ValidationError('Invalid JSON format')
|
||||
|
||||
|
||||
class AddDocumentForm(FlaskForm):
|
||||
file = FileField('File', validators=[FileRequired(), allowed_file])
|
||||
name = StringField('Name', validators=[Length(max=100)])
|
||||
language = SelectField('Language', choices=[], validators=[Optional()])
|
||||
user_context = TextAreaField('User Context', validators=[Optional()])
|
||||
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
||||
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
||||
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
||||
|
||||
submit = SubmitField('Submit')
|
||||
|
||||
@@ -36,6 +47,8 @@ class AddURLForm(FlaskForm):
|
||||
language = SelectField('Language', choices=[], validators=[Optional()])
|
||||
user_context = TextAreaField('User Context', validators=[Optional()])
|
||||
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
||||
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
||||
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
||||
|
||||
submit = SubmitField('Submit')
|
||||
|
||||
@@ -62,21 +75,6 @@ class AddURLsForm(FlaskForm):
|
||||
self.language.data = session.get('tenant').get('default_language')
|
||||
|
||||
|
||||
class AddYoutubeForm(FlaskForm):
|
||||
url = URLField('Youtube URL', validators=[DataRequired(), URL()])
|
||||
name = StringField('Name', validators=[Length(max=100)])
|
||||
language = SelectField('Language', choices=[], validators=[Optional()])
|
||||
user_context = TextAreaField('User Context', validators=[Optional()])
|
||||
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
||||
|
||||
submit = SubmitField('Submit')
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.language.choices = [(language, language) for language in
|
||||
session.get('tenant').get('allowed_languages')]
|
||||
|
||||
|
||||
class EditDocumentForm(FlaskForm):
|
||||
name = StringField('Name', validators=[Length(max=100)])
|
||||
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
||||
@@ -89,8 +87,7 @@ class EditDocumentVersionForm(FlaskForm):
|
||||
language = StringField('Language')
|
||||
user_context = TextAreaField('User Context', validators=[Optional()])
|
||||
system_context = TextAreaField('System Context', validators=[Optional()])
|
||||
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
||||
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
||||
|
||||
submit = SubmitField('Submit')
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,27 +1,24 @@
|
||||
import ast
|
||||
import os
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
|
||||
import chardet
|
||||
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
|
||||
from flask_security import roles_accepted, current_user
|
||||
from sqlalchemy import desc
|
||||
from sqlalchemy.orm import joinedload
|
||||
from werkzeug.datastructures import FileStorage
|
||||
from werkzeug.utils import secure_filename
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
import requests
|
||||
from requests.exceptions import SSLError
|
||||
from urllib.parse import urlparse, unquote
|
||||
import io
|
||||
from minio.error import S3Error
|
||||
import json
|
||||
|
||||
from common.models.document import Document, DocumentVersion
|
||||
from common.extensions import db, minio_client
|
||||
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
|
||||
process_multiple_urls, prepare_youtube_document, create_version_for_document, upload_file_for_version
|
||||
process_multiple_urls, get_documents_list, edit_document, \
|
||||
edit_document_version, refresh_document
|
||||
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
|
||||
EveAIDoubleURLException, EveAIYoutubeError
|
||||
EveAIDoubleURLException
|
||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
|
||||
from common.utils.middleware import mw_before_request
|
||||
from common.utils.celery_utils import current_celery
|
||||
@@ -75,7 +72,10 @@ def add_document():
|
||||
'name': form.name.data,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
'valid_from': form.valid_from.data
|
||||
'valid_from': form.valid_from.data,
|
||||
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
|
||||
'system_metadata': json.loads(form.system_metadata.data) if form.system_metadata.data else None
|
||||
|
||||
}
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
||||
@@ -111,7 +111,9 @@ def add_url():
|
||||
'url': url,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
'valid_from': form.valid_from.data
|
||||
'valid_from': form.valid_from.data,
|
||||
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
|
||||
'system_metadata': json.loads(form.system_metadata.data) if form.system_metadata.data else None
|
||||
}
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
||||
@@ -175,9 +177,7 @@ def documents():
|
||||
page = request.args.get('page', 1, type=int)
|
||||
per_page = request.args.get('per_page', 10, type=int)
|
||||
|
||||
query = Document.query.order_by(desc(Document.created_at))
|
||||
|
||||
pagination = query.paginate(page=page, per_page=per_page, error_out=False)
|
||||
pagination = get_documents_list(page, per_page)
|
||||
docs = pagination.items
|
||||
|
||||
rows = prepare_table_for_macro(docs, [('id', ''), ('name', ''), ('valid_from', ''), ('valid_to', '')])
|
||||
@@ -195,11 +195,11 @@ def handle_document_selection():
|
||||
|
||||
match action:
|
||||
case 'edit_document':
|
||||
return redirect(prefixed_url_for('document_bp.edit_document', document_id=doc_id))
|
||||
return redirect(prefixed_url_for('document_bp.edit_document_view', document_id=doc_id))
|
||||
case 'document_versions':
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
|
||||
case 'refresh_document':
|
||||
refresh_document(doc_id)
|
||||
refresh_document_view(doc_id)
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=doc_id))
|
||||
case 're_embed_latest_versions':
|
||||
re_embed_latest_versions()
|
||||
@@ -210,25 +210,22 @@ def handle_document_selection():
|
||||
|
||||
@document_bp.route('/edit_document/<int:document_id>', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def edit_document(document_id):
|
||||
def edit_document_view(document_id):
|
||||
doc = Document.query.get_or_404(document_id)
|
||||
form = EditDocumentForm(obj=doc)
|
||||
|
||||
if form.validate_on_submit():
|
||||
doc.name = form.name.data
|
||||
doc.valid_from = form.valid_from.data
|
||||
doc.valid_to = form.valid_to.data
|
||||
|
||||
update_logging_information(doc, dt.now(tz.utc))
|
||||
|
||||
try:
|
||||
db.session.add(doc)
|
||||
db.session.commit()
|
||||
flash(f'Document {doc.id} updated successfully', 'success')
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
flash(f'Error updating document: {e}', 'danger')
|
||||
current_app.logger.error(f'Error updating document: {e}')
|
||||
updated_doc, error = edit_document(
|
||||
document_id,
|
||||
form.name.data,
|
||||
form.valid_from.data,
|
||||
form.valid_to.data
|
||||
)
|
||||
if updated_doc:
|
||||
flash(f'Document {updated_doc.id} updated successfully', 'success')
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
else:
|
||||
flash(f'Error updating document: {error}', 'danger')
|
||||
else:
|
||||
form_validation_failed(request, form)
|
||||
|
||||
@@ -237,24 +234,20 @@ def edit_document(document_id):
|
||||
|
||||
@document_bp.route('/edit_document_version/<int:document_version_id>', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def edit_document_version(document_version_id):
|
||||
def edit_document_version_view(document_version_id):
|
||||
doc_vers = DocumentVersion.query.get_or_404(document_version_id)
|
||||
form = EditDocumentVersionForm(obj=doc_vers)
|
||||
|
||||
if form.validate_on_submit():
|
||||
doc_vers.user_context = form.user_context.data
|
||||
|
||||
update_logging_information(doc_vers, dt.now(tz.utc))
|
||||
|
||||
try:
|
||||
db.session.add(doc_vers)
|
||||
db.session.commit()
|
||||
flash(f'Document Version {doc_vers.id} updated successfully', 'success')
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
flash(f'Error updating document version: {e}', 'danger')
|
||||
current_app.logger.error(f'Error updating document version {doc_vers.id} '
|
||||
f'for tenant {session['tenant']['id']}: {e}')
|
||||
updated_version, error = edit_document_version(
|
||||
document_version_id,
|
||||
form.user_context.data
|
||||
)
|
||||
if updated_version:
|
||||
flash(f'Document Version {updated_version.id} updated successfully', 'success')
|
||||
return redirect(prefixed_url_for('document_bp.document_versions', document_id=updated_version.doc_id))
|
||||
else:
|
||||
flash(f'Error updating document version: {error}', 'danger')
|
||||
else:
|
||||
form_validation_failed(request, form)
|
||||
|
||||
@@ -298,7 +291,7 @@ def handle_document_version_selection():
|
||||
|
||||
match action:
|
||||
case 'edit_document_version':
|
||||
return redirect(prefixed_url_for('document_bp.edit_document_version', document_version_id=doc_vers_id))
|
||||
return redirect(prefixed_url_for('document_bp.edit_document_version_view', document_version_id=doc_vers_id))
|
||||
case 'process_document_version':
|
||||
process_version(doc_vers_id)
|
||||
# Add more conditions for other actions
|
||||
@@ -341,55 +334,13 @@ def refresh_all_documents():
|
||||
refresh_document(doc.id)
|
||||
|
||||
|
||||
def refresh_document(doc_id):
|
||||
doc = Document.query.get_or_404(doc_id)
|
||||
doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
||||
if not doc_vers.url:
|
||||
current_app.logger.info(f'Document {doc_id} has no URL, skipping refresh')
|
||||
flash(f'This document has no URL. I can only refresh documents with a URL. skipping refresh', 'alert')
|
||||
return
|
||||
|
||||
new_doc_vers = create_version_for_document(doc, doc_vers.url, doc_vers.language, doc_vers.user_context)
|
||||
|
||||
try:
|
||||
db.session.add(new_doc_vers)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Error refreshing document {doc_id} for tenant {session["tenant"]["id"]}: {e}')
|
||||
flash('Error refreshing document.', 'alert')
|
||||
db.session.rollback()
|
||||
error = e.args
|
||||
raise
|
||||
except Exception as e:
|
||||
current_app.logger.error('Unknown error')
|
||||
raise
|
||||
|
||||
html = fetch_html(new_doc_vers.url)
|
||||
file = io.BytesIO(html)
|
||||
|
||||
parsed_url = urlparse(new_doc_vers.url)
|
||||
path_parts = parsed_url.path.split('/')
|
||||
filename = path_parts[-1]
|
||||
if filename == '':
|
||||
filename = 'index'
|
||||
if not filename.endswith('.html'):
|
||||
filename += '.html'
|
||||
extension = 'html'
|
||||
|
||||
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}')
|
||||
|
||||
upload_file_for_version(new_doc_vers, file, extension, session["tenant"]["id"])
|
||||
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.id,
|
||||
])
|
||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}. '
|
||||
f'Embedding creation task: {task.id}')
|
||||
flash(f'Processing on document {doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
||||
'success')
|
||||
def refresh_document_view(document_id):
|
||||
new_version, result = refresh_document(document_id)
|
||||
if new_version:
|
||||
flash(f'Document refreshed. New version: {new_version.id}. Task ID: {result}', 'success')
|
||||
else:
|
||||
flash(f'Error refreshing document: {result}', 'danger')
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
|
||||
|
||||
def re_embed_latest_versions():
|
||||
|
||||
Reference in New Issue
Block a user