Full support for different Embedding models in the database. There was an error.
This commit is contained in:
@@ -75,7 +75,7 @@ class DocumentVersion(db.Model):
|
|||||||
processing_error = db.Column(db.String(255), nullable=True)
|
processing_error = db.Column(db.String(255), nullable=True)
|
||||||
|
|
||||||
# Relations
|
# Relations
|
||||||
embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True)
|
embeddings = db.relationship('Embedding', backref='document_version', lazy=True)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
|
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
|
||||||
@@ -87,23 +87,41 @@ class DocumentVersion(db.Model):
|
|||||||
return f"{self.id}.{self.file_type}"
|
return f"{self.id}.{self.file_type}"
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingMistral(db.Model):
|
class Embedding(db.Model):
|
||||||
|
__tablename__ = 'embeddings'
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
type = db.Column(db.String(30), nullable=False)
|
||||||
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
||||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
active = db.Column(db.Boolean, nullable=False, default=True)
|
||||||
chunk = db.Column(db.Text, nullable=False)
|
chunk = db.Column(db.Text, nullable=False)
|
||||||
|
|
||||||
|
__mapper_args__ = {
|
||||||
|
'polymorphic_identity': 'embedding',
|
||||||
|
'polymorphic_on': type
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingMistral(Embedding):
|
||||||
|
__tablename__ = 'embedding_mistral'
|
||||||
|
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
|
||||||
|
|
||||||
# 1024 is the MISTRAL Embedding dimension.
|
# 1024 is the MISTRAL Embedding dimension.
|
||||||
# If another embedding model is chosen, this dimension may need to be changed.
|
# If another embedding model is chosen, this dimension may need to be changed.
|
||||||
embedding = db.Column(Vector(1024), nullable=False)
|
embedding = db.Column(Vector(1024), nullable=False)
|
||||||
|
|
||||||
|
__mapper_args__ = {
|
||||||
|
'polymorphic_identity': 'embedding_mistral',
|
||||||
|
}
|
||||||
|
|
||||||
class EmbeddingSmallOpenAI(db.Model):
|
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
class EmbeddingSmallOpenAI(Embedding):
|
||||||
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
__tablename__ = 'embedding_small_openai'
|
||||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
|
||||||
chunk = db.Column(db.Text, nullable=False)
|
|
||||||
|
|
||||||
# 1536 is the OpenAI Small Embedding dimension.
|
# 1536 is the OpenAI Small Embedding dimension.
|
||||||
# If another embedding model is chosen, this dimension may need to be changed.
|
# If another embedding model is chosen, this dimension may need to be changed.
|
||||||
embedding = db.Column(Vector(1536), nullable=False)
|
embedding = db.Column(Vector(1536), nullable=False)
|
||||||
|
|
||||||
|
__mapper_args__ = {
|
||||||
|
'polymorphic_identity': 'embedding_small_openai',
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,65 @@
|
|||||||
|
"""Full support for multiple Embedding Models - error in previous version
|
||||||
|
|
||||||
|
Revision ID: f88854b2ac59
|
||||||
|
Revises: 324ed734ed9b
|
||||||
|
Create Date: 2024-05-08 22:35:11.659108
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
import pgvector
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'f88854b2ac59'
|
||||||
|
down_revision = '324ed734ed9b'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_table('embeddings',
|
||||||
|
sa.Column('id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('type', sa.String(length=30), nullable=False),
|
||||||
|
sa.Column('doc_vers_id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('active', sa.Boolean(), nullable=False),
|
||||||
|
sa.Column('chunk', sa.Text(), nullable=False),
|
||||||
|
sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], ),
|
||||||
|
sa.PrimaryKeyConstraint('id')
|
||||||
|
)
|
||||||
|
op.create_table('embedding_small_openai',
|
||||||
|
sa.Column('id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), nullable=False),
|
||||||
|
sa.ForeignKeyConstraint(['id'], ['embeddings.id'], ),
|
||||||
|
sa.PrimaryKeyConstraint('id')
|
||||||
|
)
|
||||||
|
op.drop_table('embedding_small_open_ai')
|
||||||
|
op.drop_constraint('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', type_='foreignkey')
|
||||||
|
op.create_foreign_key(None, 'embedding_mistral', 'embeddings', ['id'], ['id'])
|
||||||
|
op.drop_column('embedding_mistral', 'doc_vers_id')
|
||||||
|
op.drop_column('embedding_mistral', 'chunk')
|
||||||
|
op.drop_column('embedding_mistral', 'active')
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column('embedding_mistral', sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False))
|
||||||
|
op.add_column('embedding_mistral', sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False))
|
||||||
|
op.add_column('embedding_mistral', sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False))
|
||||||
|
op.drop_constraint(None, 'embedding_mistral', type_='foreignkey')
|
||||||
|
op.create_foreign_key('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', 'document_version', ['doc_vers_id'], ['id'])
|
||||||
|
op.create_table('embedding_small_open_ai',
|
||||||
|
sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False),
|
||||||
|
sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False),
|
||||||
|
sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False),
|
||||||
|
sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), autoincrement=False, nullable=False),
|
||||||
|
sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False),
|
||||||
|
sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], name='embedding_small_open_ai_doc_vers_id_fkey'),
|
||||||
|
sa.PrimaryKeyConstraint('id', name='embedding_small_open_ai_pkey')
|
||||||
|
)
|
||||||
|
op.drop_table('embedding_small_openai')
|
||||||
|
op.drop_table('embeddings')
|
||||||
|
# ### end Alembic commands ###
|
||||||
Reference in New Issue
Block a user