Full support for different Embedding models in the database. There was an error.

This commit is contained in:
Josako
2024-05-08 22:40:12 +02:00
parent bf6d91527b
commit 667d99daa8
2 changed files with 90 additions and 7 deletions

View File

@@ -75,7 +75,7 @@ class DocumentVersion(db.Model):
processing_error = db.Column(db.String(255), nullable=True)
# Relations
embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True)
embeddings = db.relationship('Embedding', backref='document_version', lazy=True)
def __repr__(self):
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
@@ -87,23 +87,41 @@ class DocumentVersion(db.Model):
return f"{self.id}.{self.file_type}"
class EmbeddingMistral(db.Model):
class Embedding(db.Model):
__tablename__ = 'embeddings'
id = db.Column(db.Integer, primary_key=True)
type = db.Column(db.String(30), nullable=False)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding',
'polymorphic_on': type
}
class EmbeddingMistral(Embedding):
__tablename__ = 'embedding_mistral'
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
# 1024 is the MISTRAL Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(1024), nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding_mistral',
}
class EmbeddingSmallOpenAI(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
class EmbeddingSmallOpenAI(Embedding):
__tablename__ = 'embedding_small_openai'
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
# 1536 is the OpenAI Small Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(1536), nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding_small_openai',
}

View File

@@ -0,0 +1,65 @@
"""Full support for multiple Embedding Models - error in previous version
Revision ID: f88854b2ac59
Revises: 324ed734ed9b
Create Date: 2024-05-08 22:35:11.659108
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
import pgvector
# revision identifiers, used by Alembic.
revision = 'f88854b2ac59'
down_revision = '324ed734ed9b'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('embeddings',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('type', sa.String(length=30), nullable=False),
sa.Column('doc_vers_id', sa.Integer(), nullable=False),
sa.Column('active', sa.Boolean(), nullable=False),
sa.Column('chunk', sa.Text(), nullable=False),
sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('embedding_small_openai',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), nullable=False),
sa.ForeignKeyConstraint(['id'], ['embeddings.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.drop_table('embedding_small_open_ai')
op.drop_constraint('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', type_='foreignkey')
op.create_foreign_key(None, 'embedding_mistral', 'embeddings', ['id'], ['id'])
op.drop_column('embedding_mistral', 'doc_vers_id')
op.drop_column('embedding_mistral', 'chunk')
op.drop_column('embedding_mistral', 'active')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('embedding_mistral', sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False))
op.add_column('embedding_mistral', sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False))
op.add_column('embedding_mistral', sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False))
op.drop_constraint(None, 'embedding_mistral', type_='foreignkey')
op.create_foreign_key('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', 'document_version', ['doc_vers_id'], ['id'])
op.create_table('embedding_small_open_ai',
sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False),
sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False),
sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False),
sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), autoincrement=False, nullable=False),
sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False),
sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], name='embedding_small_open_ai_doc_vers_id_fkey'),
sa.PrimaryKeyConstraint('id', name='embedding_small_open_ai_pkey')
)
op.drop_table('embedding_small_openai')
op.drop_table('embeddings')
# ### end Alembic commands ###