Add extra chunking information in Tenant schema

Add extra scripts for flask-migrate to support refactoring
This commit is contained in:
Josako
2024-05-08 17:40:42 +02:00
parent cd5afa0408
commit bf6d91527b
9 changed files with 173 additions and 4 deletions

View File

@@ -27,6 +27,8 @@ class DocumentLanguage(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False) document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False)
language = db.Column(db.String(2), nullable=False) language = db.Column(db.String(2), nullable=False)
user_context = db.Column(db.Text, nullable=True)
system_context = db.Column(db.Text, nullable=True)
latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True) latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True)
# Versioning Information # Versioning Information
@@ -89,6 +91,7 @@ class EmbeddingMistral(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False) doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True) active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
# 1024 is the MISTRAL Embedding dimension. # 1024 is the MISTRAL Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed. # If another embedding model is chosen, this dimension may need to be changed.
@@ -99,6 +102,7 @@ class EmbeddingSmallOpenAI(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False) doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True) active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
# 1536 is the OpenAI Small Embedding dimension. # 1536 is the OpenAI Small Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed. # If another embedding model is chosen, this dimension may need to be changed.

View File

@@ -1,7 +1,7 @@
from flask import session from flask import session
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import (StringField, BooleanField, SubmitField, DateField, from wtforms import (StringField, BooleanField, SubmitField, DateField,
SelectField, FieldList, FormField) SelectField, FieldList, FormField, TextAreaField)
from wtforms.validators import DataRequired, Length, Optional from wtforms.validators import DataRequired, Length, Optional
from flask_wtf.file import FileField, FileAllowed, FileRequired from flask_wtf.file import FileField, FileAllowed, FileRequired
@@ -11,6 +11,7 @@ class AddDocumentForm(FlaskForm):
FileRequired()]) FileRequired()])
name = StringField('Name', validators=[Length(max=100)]) name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()]) language = SelectField('Language', choices=[], validators=[Optional()])
user_context = TextAreaField('User Context', validators=[Optional()])
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()]) valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
doc_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()]) doc_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()])

View File

@@ -48,7 +48,7 @@ def add_document():
set_logging_information(new_doc, dt.now(tz.utc)) set_logging_information(new_doc, dt.now(tz.utc))
# Create the DocumentLanguage # Create the DocumentLanguage
new_doc_lang = create_language_for_document(new_doc, form.language.data) new_doc_lang = create_language_for_document(new_doc, form.language.data, form.user_context.data)
# Create the DocumentVersion # Create the DocumentVersion
new_doc_vers = DocumentVersion() new_doc_vers = DocumentVersion()
@@ -122,13 +122,16 @@ def set_logging_information(obj, timestamp):
obj.updated_by = current_user.id obj.updated_by = current_user.id
def create_language_for_document(document, language): def create_language_for_document(document, language, user_context):
new_doc_lang = DocumentLanguage() new_doc_lang = DocumentLanguage()
if language == '': if language == '':
new_doc_lang.language = session['default_language'] new_doc_lang.language = session['default_language']
else: else:
new_doc_lang.language = language new_doc_lang.language = language
if user_context != '':
new_doc_lang.user_context = user_context
new_doc_lang.document = document new_doc_lang.document = document
set_logging_information(new_doc_lang, dt.now(tz.utc)) set_logging_information(new_doc_lang, dt.now(tz.utc))

View File

@@ -6,7 +6,7 @@ from flask import current_app
from alembic import context from alembic import context
from sqlalchemy import NullPool, engine_from_config, text from sqlalchemy import NullPool, engine_from_config, text
from common.models import Tenant from common.models.user import Tenant
import pgvector import pgvector
from pgvector.sqlalchemy import Vector from pgvector.sqlalchemy import Vector

View File

@@ -0,0 +1,34 @@
"""Chunking information added to Document models
Revision ID: 324ed734ed9b
Revises: 3bc7bbed1ec6
Create Date: 2024-05-08 17:15:18.472857
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '324ed734ed9b'
down_revision = '3bc7bbed1ec6'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_language', sa.Column('user_context', sa.Text(), nullable=True))
op.add_column('document_language', sa.Column('system_context', sa.Text(), nullable=True))
op.add_column('embedding_mistral', sa.Column('chunk', sa.Text(), nullable=False))
op.add_column('embedding_small_open_ai', sa.Column('chunk', sa.Text(), nullable=False))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('embedding_small_open_ai', 'chunk')
op.drop_column('embedding_mistral', 'chunk')
op.drop_column('document_language', 'system_context')
op.drop_column('document_language', 'user_context')
# ### end Alembic commands ###

View File

@@ -0,0 +1,32 @@
"""ensure latest_version_id is nullable
Revision ID: 3bc7bbed1ec6
Revises: 5c67abd3b371
Create Date: 2024-05-04 17:15:49.302462
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '3bc7bbed1ec6'
down_revision = '5c67abd3b371'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('document_language', 'latest_version_id',
existing_type=sa.INTEGER(),
nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('document_language', 'latest_version_id',
existing_type=sa.INTEGER(),
nullable=False)
# ### end Alembic commands ###

View File

@@ -0,0 +1,30 @@
"""add caching for latest version in Document_Language
Revision ID: 5c67abd3b371
Revises: 45620002ae0d
Create Date: 2024-05-04 16:58:50.534885
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '5c67abd3b371'
down_revision = '45620002ae0d'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_language', sa.Column('latest_version_id', sa.Integer(), nullable=False))
op.create_foreign_key(None, 'document_language', 'document_version', ['latest_version_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'document_language', type_='foreignkey')
op.drop_column('document_language', 'latest_version_id')
# ### end Alembic commands ###

33
scripts/db_migrate.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Usage: ./migrate.sh -m "Your migration message" -d migrations/public
cd "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" || exit 1
source "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/.venv/bin/activate"
while getopts m:d: flag
do
case "${flag}" in
m) message=${OPTARG};;
d) directory=${OPTARG};;
*) # Catch-all for unexpected arguments
echo "Invalid option: -$OPTARG" >&2
echo "Usage: ./migrate.sh -m \"Your migration message\" -d migrations/public"
exit 1
;;
esac
done
# Check if the message and directory are provided
if [ -z "$message" ] || [ -z "$directory" ]; then
echo "Both message and directory are required."
echo "Usage: ./migrate.sh -m \"Your migration message\" -d migrations/public"
exit 1
fi
# Set FLASK_APP environment variable
export FLASK_APP=scripts/run_eveai_app.py # Modify if your Flask app is initiated differently
export PYTHONPATH="$PYTHONPATH:/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/"
# Run the Flask migration command
flask db migrate -m "$message" -d "$directory"

32
scripts/db_upgrade.sh Executable file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Usage: ./upgrade.sh -d migrations/public
cd "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/" || exit 1
source "/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/.venv/bin/activate"
while getopts d: flag
do
case "${flag}" in
d) directory=${OPTARG};;
*) # Catch-all for unexpected arguments
echo "Invalid option: -$OPTARG" >&2
echo "Usage: ./upgrade.sh -d migrations/public"
exit 1
;;
esac
done
# Check if the directory is provided
if [ -z "$directory" ]; then
echo "Directory parameter is required."
echo "Usage: ./upgrade.sh -d migrations/public"
exit 1
fi
# Set FLASK_APP environment variable
export FLASK_APP=scripts/run_eveai_app.py # Modify if your Flask app is initiated differently
export PYTHONPATH="$PYTHONPATH:/Volumes/OWC4M2_1/Dropbox/Josako's Dev/Josako/EveAI/Development/eveAI/"
# Run the Flask upgrade command
flask db upgrade -d "$directory"