diff --git a/common/models/document.py b/common/models/document.py index 1b3dfdc..9063e61 100644 --- a/common/models/document.py +++ b/common/models/document.py @@ -27,6 +27,8 @@ class DocumentLanguage(db.Model): id = db.Column(db.Integer, primary_key=True) document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False) language = db.Column(db.String(2), nullable=False) + user_context = db.Column(db.Text, nullable=True) + system_context = db.Column(db.Text, nullable=True) latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True) # Versioning Information @@ -89,6 +91,7 @@ class EmbeddingMistral(db.Model): id = db.Column(db.Integer, primary_key=True) doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False) active = db.Column(db.Boolean, nullable=False, default=True) + chunk = db.Column(db.Text, nullable=False) # 1024 is the MISTRAL Embedding dimension. # If another embedding model is chosen, this dimension may need to be changed. @@ -99,6 +102,7 @@ class EmbeddingSmallOpenAI(db.Model): id = db.Column(db.Integer, primary_key=True) doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False) active = db.Column(db.Boolean, nullable=False, default=True) + chunk = db.Column(db.Text, nullable=False) # 1536 is the OpenAI Small Embedding dimension. # If another embedding model is chosen, this dimension may need to be changed. diff --git a/eveai_app/views/document_forms.py b/eveai_app/views/document_forms.py index 3102d5f..3b0a670 100644 --- a/eveai_app/views/document_forms.py +++ b/eveai_app/views/document_forms.py @@ -1,7 +1,7 @@ from flask import session from flask_wtf import FlaskForm from wtforms import (StringField, BooleanField, SubmitField, DateField, - SelectField, FieldList, FormField) + SelectField, FieldList, FormField, TextAreaField) from wtforms.validators import DataRequired, Length, Optional from flask_wtf.file import FileField, FileAllowed, FileRequired @@ -11,6 +11,7 @@ class AddDocumentForm(FlaskForm): FileRequired()]) name = StringField('Name', validators=[Length(max=100)]) language = SelectField('Language', choices=[], validators=[Optional()]) + user_context = TextAreaField('User Context', validators=[Optional()]) valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()]) doc_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()]) diff --git a/eveai_app/views/document_views.py b/eveai_app/views/document_views.py index a047d8a..7c68a4a 100644 --- a/eveai_app/views/document_views.py +++ b/eveai_app/views/document_views.py @@ -48,7 +48,7 @@ def add_document(): set_logging_information(new_doc, dt.now(tz.utc)) # Create the DocumentLanguage - new_doc_lang = create_language_for_document(new_doc, form.language.data) + new_doc_lang = create_language_for_document(new_doc, form.language.data, form.user_context.data) # Create the DocumentVersion new_doc_vers = DocumentVersion() @@ -122,13 +122,16 @@ def set_logging_information(obj, timestamp): obj.updated_by = current_user.id -def create_language_for_document(document, language): +def create_language_for_document(document, language, user_context): new_doc_lang = DocumentLanguage() if language == '': new_doc_lang.language = session['default_language'] else: new_doc_lang.language = language + if user_context != '': + new_doc_lang.user_context = user_context + new_doc_lang.document = document set_logging_information(new_doc_lang, dt.now(tz.utc)) diff --git a/migrations/tenant/env.py b/migrations/tenant/env.py index 1ba7dc2..a0d1b41 100644 --- a/migrations/tenant/env.py +++ b/migrations/tenant/env.py @@ -6,7 +6,7 @@ from flask import current_app from alembic import context from sqlalchemy import NullPool, engine_from_config, text -from common.models import Tenant +from common.models.user import Tenant import pgvector from pgvector.sqlalchemy import Vector diff --git a/migrations/tenant/versions/324ed734ed9b_chunking_information_added_to_document_.py b/migrations/tenant/versions/324ed734ed9b_chunking_information_added_to_document_.py new file mode 100644 index 0000000..3d1186b --- /dev/null +++ b/migrations/tenant/versions/324ed734ed9b_chunking_information_added_to_document_.py @@ -0,0 +1,34 @@ +"""Chunking information added to Document models + +Revision ID: 324ed734ed9b +Revises: 3bc7bbed1ec6 +Create Date: 2024-05-08 17:15:18.472857 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '324ed734ed9b' +down_revision = '3bc7bbed1ec6' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('document_language', sa.Column('user_context', sa.Text(), nullable=True)) + op.add_column('document_language', sa.Column('system_context', sa.Text(), nullable=True)) + op.add_column('embedding_mistral', sa.Column('chunk', sa.Text(), nullable=False)) + op.add_column('embedding_small_open_ai', sa.Column('chunk', sa.Text(), nullable=False)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('embedding_small_open_ai', 'chunk') + op.drop_column('embedding_mistral', 'chunk') + op.drop_column('document_language', 'system_context') + op.drop_column('document_language', 'user_context') + # ### end Alembic commands ### diff --git a/migrations/tenant/versions/3bc7bbed1ec6_ensure_latest_version_id_is_nullable.py b/migrations/tenant/versions/3bc7bbed1ec6_ensure_latest_version_id_is_nullable.py new file mode 100644 index 0000000..92efd3e --- /dev/null +++ b/migrations/tenant/versions/3bc7bbed1ec6_ensure_latest_version_id_is_nullable.py @@ -0,0 +1,32 @@ +"""ensure latest_version_id is nullable + +Revision ID: 3bc7bbed1ec6 +Revises: 5c67abd3b371 +Create Date: 2024-05-04 17:15:49.302462 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '3bc7bbed1ec6' +down_revision = '5c67abd3b371' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('document_language', 'latest_version_id', + existing_type=sa.INTEGER(), + nullable=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('document_language', 'latest_version_id', + existing_type=sa.INTEGER(), + nullable=False) + # ### end Alembic commands ### diff --git a/migrations/tenant/versions/5c67abd3b371_add_caching_for_latest_version_in_.py b/migrations/tenant/versions/5c67abd3b371_add_caching_for_latest_version_in_.py new file mode 100644 index 0000000..9f583e0 --- /dev/null +++ b/migrations/tenant/versions/5c67abd3b371_add_caching_for_latest_version_in_.py @@ -0,0 +1,30 @@ +"""add caching for latest version in Document_Language + +Revision ID: 5c67abd3b371 +Revises: 45620002ae0d +Create Date: 2024-05-04 16:58:50.534885 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '5c67abd3b371' +down_revision = '45620002ae0d' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('document_language', sa.Column('latest_version_id', sa.Integer(), nullable=False)) + op.create_foreign_key(None, 'document_language', 'document_version', ['latest_version_id'], ['id']) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'document_language', type_='foreignkey') + op.drop_column('document_language', 'latest_version_id') + # ### end Alembic commands ### diff --git a/scripts/db_migrate.sh b/scripts/db_migrate.sh new file mode 100755 index 0000000..37ab0c1 --- /dev/null +++ b/scripts/db_migrate.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Usage: ./migrate.sh -m "Your migration message" -d migrations/public + +cd "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" || exit 1 +source "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/.venv/bin/activate" + +while getopts m:d: flag +do + case "${flag}" in + m) message=${OPTARG};; + d) directory=${OPTARG};; + *) # Catch-all for unexpected arguments + echo "Invalid option: -$OPTARG" >&2 + echo "Usage: ./migrate.sh -m \"Your migration message\" -d migrations/public" + exit 1 + ;; + esac +done + +# Check if the message and directory are provided +if [ -z "$message" ] || [ -z "$directory" ]; then + echo "Both message and directory are required." + echo "Usage: ./migrate.sh -m \"Your migration message\" -d migrations/public" + exit 1 +fi + +# Set FLASK_APP environment variable +export FLASK_APP=scripts/run_eveai_app.py # Modify if your Flask app is initiated differently +export PYTHONPATH="$PYTHONPATH:/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" + +# Run the Flask migration command +flask db migrate -m "$message" -d "$directory" \ No newline at end of file diff --git a/scripts/db_upgrade.sh b/scripts/db_upgrade.sh new file mode 100755 index 0000000..9291172 --- /dev/null +++ b/scripts/db_upgrade.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +# Usage: ./upgrade.sh -d migrations/public + +cd "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" || exit 1 +source "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/.venv/bin/activate" + +while getopts d: flag +do + case "${flag}" in + d) directory=${OPTARG};; + *) # Catch-all for unexpected arguments + echo "Invalid option: -$OPTARG" >&2 + echo "Usage: ./upgrade.sh -d migrations/public" + exit 1 + ;; + esac +done + +# Check if the directory is provided +if [ -z "$directory" ]; then + echo "Directory parameter is required." + echo "Usage: ./upgrade.sh -d migrations/public" + exit 1 +fi + +# Set FLASK_APP environment variable +export FLASK_APP=scripts/run_eveai_app.py # Modify if your Flask app is initiated differently +export PYTHONPATH="$PYTHONPATH:/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" + +# Run the Flask upgrade command +flask db upgrade -d "$directory" \ No newline at end of file