diff --git a/common/models/document.py b/common/models/document.py index 9063e61..2ab70f2 100644 --- a/common/models/document.py +++ b/common/models/document.py @@ -75,7 +75,7 @@ class DocumentVersion(db.Model): processing_error = db.Column(db.String(255), nullable=True) # Relations - embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True) + embeddings = db.relationship('Embedding', backref='document_version', lazy=True) def __repr__(self): return f".{self.id}>" @@ -87,23 +87,41 @@ class DocumentVersion(db.Model): return f"{self.id}.{self.file_type}" -class EmbeddingMistral(db.Model): +class Embedding(db.Model): + __tablename__ = 'embeddings' id = db.Column(db.Integer, primary_key=True) + type = db.Column(db.String(30), nullable=False) doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False) active = db.Column(db.Boolean, nullable=False, default=True) chunk = db.Column(db.Text, nullable=False) + __mapper_args__ = { + 'polymorphic_identity': 'embedding', + 'polymorphic_on': type + } + + +class EmbeddingMistral(Embedding): + __tablename__ = 'embedding_mistral' + id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True) + # 1024 is the MISTRAL Embedding dimension. # If another embedding model is chosen, this dimension may need to be changed. embedding = db.Column(Vector(1024), nullable=False) + __mapper_args__ = { + 'polymorphic_identity': 'embedding_mistral', + } -class EmbeddingSmallOpenAI(db.Model): - id = db.Column(db.Integer, primary_key=True) - doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False) - active = db.Column(db.Boolean, nullable=False, default=True) - chunk = db.Column(db.Text, nullable=False) + +class EmbeddingSmallOpenAI(Embedding): + __tablename__ = 'embedding_small_openai' + id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True) # 1536 is the OpenAI Small Embedding dimension. # If another embedding model is chosen, this dimension may need to be changed. embedding = db.Column(Vector(1536), nullable=False) + + __mapper_args__ = { + 'polymorphic_identity': 'embedding_small_openai', + } diff --git a/migrations/tenant/versions/f88854b2ac59_full_support_for_multiple_embedding_.py b/migrations/tenant/versions/f88854b2ac59_full_support_for_multiple_embedding_.py new file mode 100644 index 0000000..3f5b9d7 --- /dev/null +++ b/migrations/tenant/versions/f88854b2ac59_full_support_for_multiple_embedding_.py @@ -0,0 +1,65 @@ +"""Full support for multiple Embedding Models - error in previous version + +Revision ID: f88854b2ac59 +Revises: 324ed734ed9b +Create Date: 2024-05-08 22:35:11.659108 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +import pgvector + + +# revision identifiers, used by Alembic. +revision = 'f88854b2ac59' +down_revision = '324ed734ed9b' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('embeddings', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('type', sa.String(length=30), nullable=False), + sa.Column('doc_vers_id', sa.Integer(), nullable=False), + sa.Column('active', sa.Boolean(), nullable=False), + sa.Column('chunk', sa.Text(), nullable=False), + sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('embedding_small_openai', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), nullable=False), + sa.ForeignKeyConstraint(['id'], ['embeddings.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.drop_table('embedding_small_open_ai') + op.drop_constraint('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', type_='foreignkey') + op.create_foreign_key(None, 'embedding_mistral', 'embeddings', ['id'], ['id']) + op.drop_column('embedding_mistral', 'doc_vers_id') + op.drop_column('embedding_mistral', 'chunk') + op.drop_column('embedding_mistral', 'active') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('embedding_mistral', sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False)) + op.add_column('embedding_mistral', sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False)) + op.add_column('embedding_mistral', sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False)) + op.drop_constraint(None, 'embedding_mistral', type_='foreignkey') + op.create_foreign_key('embedding_mistral_doc_vers_id_fkey', 'embedding_mistral', 'document_version', ['doc_vers_id'], ['id']) + op.create_table('embedding_small_open_ai', + sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column('doc_vers_id', sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column('embedding', pgvector.sqlalchemy.Vector(dim=1536), autoincrement=False, nullable=False), + sa.Column('chunk', sa.TEXT(), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['doc_vers_id'], ['document_version.id'], name='embedding_small_open_ai_doc_vers_id_fkey'), + sa.PrimaryKeyConstraint('id', name='embedding_small_open_ai_pkey') + ) + op.drop_table('embedding_small_openai') + op.drop_table('embeddings') + # ### end Alembic commands ###