Add extra chunking information in Tenant schema

Add extra scripts for flask-migrate to support refactoring
This commit is contained in:
Josako
2024-05-08 17:40:42 +02:00
parent cd5afa0408
commit bf6d91527b
9 changed files with 173 additions and 4 deletions

View File

@@ -27,6 +27,8 @@ class DocumentLanguage(db.Model):
id = db.Column(db.Integer, primary_key=True)
document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False)
language = db.Column(db.String(2), nullable=False)
user_context = db.Column(db.Text, nullable=True)
system_context = db.Column(db.Text, nullable=True)
latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True)
# Versioning Information
@@ -89,6 +91,7 @@ class EmbeddingMistral(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
# 1024 is the MISTRAL Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
@@ -99,6 +102,7 @@ class EmbeddingSmallOpenAI(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
# 1536 is the OpenAI Small Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.

View File

@@ -1,7 +1,7 @@
from flask import session
from flask_wtf import FlaskForm
from wtforms import (StringField, BooleanField, SubmitField, DateField,
SelectField, FieldList, FormField)
SelectField, FieldList, FormField, TextAreaField)
from wtforms.validators import DataRequired, Length, Optional
from flask_wtf.file import FileField, FileAllowed, FileRequired
@@ -11,6 +11,7 @@ class AddDocumentForm(FlaskForm):
FileRequired()])
name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()])
user_context = TextAreaField('User Context', validators=[Optional()])
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
doc_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()])

View File

@@ -48,7 +48,7 @@ def add_document():
set_logging_information(new_doc, dt.now(tz.utc))
# Create the DocumentLanguage
new_doc_lang = create_language_for_document(new_doc, form.language.data)
new_doc_lang = create_language_for_document(new_doc, form.language.data, form.user_context.data)
# Create the DocumentVersion
new_doc_vers = DocumentVersion()
@@ -122,13 +122,16 @@ def set_logging_information(obj, timestamp):
obj.updated_by = current_user.id
def create_language_for_document(document, language):
def create_language_for_document(document, language, user_context):
new_doc_lang = DocumentLanguage()
if language == '':
new_doc_lang.language = session['default_language']
else:
new_doc_lang.language = language
if user_context != '':
new_doc_lang.user_context = user_context
new_doc_lang.document = document
set_logging_information(new_doc_lang, dt.now(tz.utc))

View File

@@ -6,7 +6,7 @@ from flask import current_app
from alembic import context
from sqlalchemy import NullPool, engine_from_config, text
from common.models import Tenant
from common.models.user import Tenant
import pgvector
from pgvector.sqlalchemy import Vector

View File

@@ -0,0 +1,34 @@
"""Chunking information added to Document models
Revision ID: 324ed734ed9b
Revises: 3bc7bbed1ec6
Create Date: 2024-05-08 17:15:18.472857
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '324ed734ed9b'
down_revision = '3bc7bbed1ec6'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_language', sa.Column('user_context', sa.Text(), nullable=True))
op.add_column('document_language', sa.Column('system_context', sa.Text(), nullable=True))
op.add_column('embedding_mistral', sa.Column('chunk', sa.Text(), nullable=False))
op.add_column('embedding_small_open_ai', sa.Column('chunk', sa.Text(), nullable=False))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('embedding_small_open_ai', 'chunk')
op.drop_column('embedding_mistral', 'chunk')
op.drop_column('document_language', 'system_context')
op.drop_column('document_language', 'user_context')
# ### end Alembic commands ###

View File

@@ -0,0 +1,32 @@
"""ensure latest_version_id is nullable
Revision ID: 3bc7bbed1ec6
Revises: 5c67abd3b371
Create Date: 2024-05-04 17:15:49.302462
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '3bc7bbed1ec6'
down_revision = '5c67abd3b371'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('document_language', 'latest_version_id',
existing_type=sa.INTEGER(),
nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('document_language', 'latest_version_id',
existing_type=sa.INTEGER(),
nullable=False)
# ### end Alembic commands ###

View File

@@ -0,0 +1,30 @@
"""add caching for latest version in Document_Language
Revision ID: 5c67abd3b371
Revises: 45620002ae0d
Create Date: 2024-05-04 16:58:50.534885
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '5c67abd3b371'
down_revision = '45620002ae0d'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_language', sa.Column('latest_version_id', sa.Integer(), nullable=False))
op.create_foreign_key(None, 'document_language', 'document_version', ['latest_version_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'document_language', type_='foreignkey')
op.drop_column('document_language', 'latest_version_id')
# ### end Alembic commands ###

33
scripts/db_migrate.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Usage: ./migrate.sh -m "Your migration message" -d migrations/public
cd "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" || exit 1
source "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/.venv/bin/activate"
while getopts m:d: flag
do
case "${flag}" in
m) message=${OPTARG};;
d) directory=${OPTARG};;
*) # Catch-all for unexpected arguments
echo "Invalid option: -$OPTARG" >&2
echo "Usage: ./migrate.sh -m \"Your migration message\" -d migrations/public"
exit 1
;;
esac
done
# Check if the message and directory are provided
if [ -z "$message" ] || [ -z "$directory" ]; then
echo "Both message and directory are required."
echo "Usage: ./migrate.sh -m \"Your migration message\" -d migrations/public"
exit 1
fi
# Set FLASK_APP environment variable
export FLASK_APP=scripts/run_eveai_app.py # Modify if your Flask app is initiated differently
export PYTHONPATH="$PYTHONPATH:/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/"
# Run the Flask migration command
flask db migrate -m "$message" -d "$directory"

32
scripts/db_upgrade.sh Executable file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Usage: ./upgrade.sh -d migrations/public
cd "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" || exit 1
source "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/.venv/bin/activate"
while getopts d: flag
do
case "${flag}" in
d) directory=${OPTARG};;
*) # Catch-all for unexpected arguments
echo "Invalid option: -$OPTARG" >&2
echo "Usage: ./upgrade.sh -d migrations/public"
exit 1
;;
esac
done
# Check if the directory is provided
if [ -z "$directory" ]; then
echo "Directory parameter is required."
echo "Usage: ./upgrade.sh -d migrations/public"
exit 1
fi
# Set FLASK_APP environment variable
export FLASK_APP=scripts/run_eveai_app.py # Modify if your Flask app is initiated differently
export PYTHONPATH="$PYTHONPATH:/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/"
# Run the Flask upgrade command
flask db upgrade -d "$directory"