Realise processing of HTML and improve both HTML & PDF processing giving new tenant information.

This commit is contained in:
Josako
2024-05-13 17:18:38 +02:00
parent adee283d7a
commit 6c2e99f467
4 changed files with 253 additions and 111 deletions

View File

@@ -38,7 +38,15 @@ def add_document():
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
create_document_stack(form, file, filename, extension)
new_doc, new_doc_lang, new_doc_vers = create_document_stack(form, file, filename, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Embedding creation task: {task.id}')
return redirect(url_for('document_bp.documents'))
@@ -67,7 +75,16 @@ def add_url():
filename += '.html'
extension = 'html'
create_document_stack(form, file, filename, extension)
new_doc, new_doc_lang, new_doc_vers = create_document_stack(form, file, filename, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
])
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Embedding creation task: {task.id}')
return redirect(url_for('document_bp.documents'))
return render_template('document/add_url.html', form=form)
@@ -123,9 +140,7 @@ def create_document_stack(form, file, filename, extension):
db.session.add(new_doc)
db.session.add(new_doc_lang)
db.session.add(new_doc_vers)
log_session_state(db.session, "Before first commit")
db.session.commit()
log_session_state(db.session, "After first commit")
except SQLAlchemyError as e:
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {e}')
flash('Error adding document.', 'error')
@@ -140,9 +155,7 @@ def create_document_stack(form, file, filename, extension):
new_doc_lang = db.session.merge(new_doc_lang)
new_doc_vers = db.session.merge(new_doc_vers)
new_doc_lang.latest_version_id = new_doc_vers.id
log_session_state(db.session, "Before second commit")
db.session.commit()
log_session_state(db.session, "After second commit")
except SQLAlchemyError as e:
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {e}')
flash('Error adding document.', 'error')
@@ -160,15 +173,8 @@ def create_document_stack(form, file, filename, extension):
f'Document Version {new_doc.id}')
upload_file_for_version(new_doc_vers, file, extension)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
session['tenant']['id'],
new_doc_vers.id,
session['default_embedding_model'],
])
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
f'Document Version {new_doc_vers.id}. '
f'Embedding creation task: {task.id}')
return new_doc, new_doc_lang, new_doc_vers
def log_session_state(session, msg=""):