Full support for different Embedding models in the database. There was an error.
This commit is contained in:
@@ -75,7 +75,7 @@ class DocumentVersion(db.Model):
|
||||
processing_error = db.Column(db.String(255), nullable=True)
|
||||
|
||||
# Relations
|
||||
embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True)
|
||||
embeddings = db.relationship('Embedding', backref='document_version', lazy=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
|
||||
@@ -87,23 +87,41 @@ class DocumentVersion(db.Model):
|
||||
return f"{self.id}.{self.file_type}"
|
||||
|
||||
|
||||
class EmbeddingMistral(db.Model):
|
||||
class Embedding(db.Model):
|
||||
__tablename__ = 'embeddings'
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
type = db.Column(db.String(30), nullable=False)
|
||||
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
||||
chunk = db.Column(db.Text, nullable=False)
|
||||
|
||||
__mapper_args__ = {
|
||||
'polymorphic_identity': 'embedding',
|
||||
'polymorphic_on': type
|
||||
}
|
||||
|
||||
|
||||
class EmbeddingMistral(Embedding):
|
||||
__tablename__ = 'embedding_mistral'
|
||||
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
|
||||
|
||||
# 1024 is the MISTRAL Embedding dimension.
|
||||
# If another embedding model is chosen, this dimension may need to be changed.
|
||||
embedding = db.Column(Vector(1024), nullable=False)
|
||||
|
||||
__mapper_args__ = {
|
||||
'polymorphic_identity': 'embedding_mistral',
|
||||
}
|
||||
|
||||
class EmbeddingSmallOpenAI(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
||||
chunk = db.Column(db.Text, nullable=False)
|
||||
|
||||
class EmbeddingSmallOpenAI(Embedding):
|
||||
__tablename__ = 'embedding_small_openai'
|
||||
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
|
||||
|
||||
# 1536 is the OpenAI Small Embedding dimension.
|
||||
# If another embedding model is chosen, this dimension may need to be changed.
|
||||
embedding = db.Column(Vector(1536), nullable=False)
|
||||
|
||||
__mapper_args__ = {
|
||||
'polymorphic_identity': 'embedding_small_openai',
|
||||
}
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Full support for multiple Embedding Models - error in previous version
|
||||
|
||||
Revision ID: f88854b2ac59
|
||||
Revises: 324ed734ed9b
|
||||
Create Date: 2024-05-08 22:35:11.659108
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
import pgvector
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'f88854b2ac59'
|
||||
down_revision = '324ed734ed9b'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('embeddings',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('type', sa.String(length=30), nullable=False),
|
||||
sa.Column('doc_vers_id', sa.Integer(), nullable=False),
|
||||
sa.Column('active', sa.Boolean(), nullable=False),
|
||||
sa.Column('chunk', sa.Text(), nullable=False),
|
||||
sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], ),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_table('embedding_small_openai',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), nullable=False),
|
||||
sa.ForeignKeyConstraint(['id'], ['embeddings.id'], ),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.drop_table('embedding_small_open_ai')
|
||||
op.drop_constraint('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', type_='foreignkey')
|
||||
op.create_foreign_key(None, 'embedding_mistral', 'embeddings', ['id'], ['id'])
|
||||
op.drop_column('embedding_mistral', 'doc_vers_id')
|
||||
op.drop_column('embedding_mistral', 'chunk')
|
||||
op.drop_column('embedding_mistral', 'active')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('embedding_mistral', sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False))
|
||||
op.add_column('embedding_mistral', sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False))
|
||||
op.add_column('embedding_mistral', sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False))
|
||||
op.drop_constraint(None, 'embedding_mistral', type_='foreignkey')
|
||||
op.create_foreign_key('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', 'document_version', ['doc_vers_id'], ['id'])
|
||||
op.create_table('embedding_small_open_ai',
|
||||
sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False),
|
||||
sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False),
|
||||
sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False),
|
||||
sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), autoincrement=False, nullable=False),
|
||||
sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False),
|
||||
sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], name='embedding_small_open_ai_doc_vers_id_fkey'),
|
||||
sa.PrimaryKeyConstraint('id', name='embedding_small_open_ai_pkey')
|
||||
)
|
||||
op.drop_table('embedding_small_openai')
|
||||
op.drop_table('embeddings')
|
||||
# ### end Alembic commands ###
|
||||
Reference in New Issue
Block a user