- Add Catalog Functionality

This commit is contained in:
Josako
2024-10-15 18:14:57 +02:00
parent 3316a8bc47
commit 3e644f1652
16 changed files with 303 additions and 84 deletions

View File

@@ -1,5 +1,7 @@
import inspect
import logging
import sys
import os
from logging.config import fileConfig
from flask import current_app
@@ -19,8 +21,26 @@ config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env')
# fileConfig(config.config_file_name)
# logger = logging.getLogger('alembic.env')
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger()
# Reset handlers to avoid issues with Alembic overriding them
for handler in logger.handlers:
logger.removeHandler(handler)
# Add a stream handler to output logs to the console
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
logger.setLevel(logging.INFO)
def get_engine():
@@ -125,6 +145,7 @@ def run_migrations_online():
tenants = get_tenant_ids()
for tenant in tenants:
try:
os.environ['TENANT_ID'] = str(tenant)
logger.info(f"Migrating tenant: {tenant}")
# set search path on the connection, which ensures that
# PostgreSQL will emit all CREATE / ALTER / DROP statements

View File

@@ -0,0 +1,56 @@
"""Adding Catalogs to Document Domain
Revision ID: 0f5932ff3051
Revises: 5a75fb6da7b8
Create Date: 2024-10-14 15:20:45.058329
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '0f5932ff3051'
down_revision = '5a75fb6da7b8'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('catalog',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=50), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('html_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
sa.Column('html_end_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
sa.Column('html_included_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
sa.Column('html_excluded_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
sa.Column('html_excluded_classes', postgresql.ARRAY(sa.String(length=200)), nullable=True),
sa.Column('min_chunk_size', sa.Integer(), nullable=True),
sa.Column('max_chunk_size', sa.Integer(), nullable=True),
sa.Column('es_k', sa.Integer(), nullable=True),
sa.Column('es_similarity_threshold', sa.Float(), nullable=True),
sa.Column('chat_RAG_temperature', sa.Float(), nullable=True),
sa.Column('chat_no_RAG_temperature', sa.Float(), nullable=True),
sa.Column('embed_tuning', sa.Boolean(), nullable=True),
sa.Column('rag_tuning', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('created_by', sa.Integer(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_by', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['created_by'], ['public.user.id'], ),
sa.ForeignKeyConstraint(['updated_by'], ['public.user.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.add_column('document', sa.Column('catalog_id', sa.Integer(), nullable=True))
op.create_foreign_key(None, 'document', 'catalog', ['catalog_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('document', 'catalog_id')
op.drop_table('catalog')
# ### end Alembic commands ###

View File

@@ -0,0 +1,99 @@
"""Upgrading Existing documents to default catalog
Revision ID: 28984b05d396
Revises: 0f5932ff3051
Create Date: 2024-10-15 09:02:23.355660
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session
import pgvector
from common.models.document import Catalog, Document
from common.models.user import Tenant
from flask import current_app
import logging
import os
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Create a console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
# Add the handler to the logger
logger.addHandler(console_handler)
logger.info("Starting revision upgrade 28984b05d396")
# revision identifiers, used by Alembic.
revision = '28984b05d396'
down_revision = '0f5932ff3051'
branch_labels = None
depends_on = None
def upgrade():
logger.info("Starting migration: Creating default catalog and updating documents.")
bind = op.get_bind()
session = Session(bind=bind)
try:
tenant_id = int(os.getenv('TENANT_ID'))
logger.info(f"In migration: tenant_id = {tenant_id}")
# Step 1: Create a new Default Catalog
logger.info("Creating default catalog")
default_catalog = Catalog(
name='Default Catalog',
description=None,
)
tenant = Tenant.query.get_or_404(tenant_id)
default_catalog.html_tags = tenant.html_tags
default_catalog.html_end_tags = tenant.html_end_tags
default_catalog.html_included_elements = tenant.html_included_elements
default_catalog.html_excluded_elements = tenant.html_excluded_elements
default_catalog.html_excluded_classes = tenant.html_excluded_classes
default_catalog.min_chunk_size = tenant.min_chunk_size
default_catalog.max_chunk_size = tenant.max_chunk_size
default_catalog.es_k = tenant.es_k
default_catalog.es_similarity_threshold = tenant.es_similarity_threshold
default_catalog.chat_RAG_temperature = tenant.chat_RAG_temperature
default_catalog.chat_no_RAG_temperature = tenant.chat_no_RAG_temperature
default_catalog.embed_tuning = tenant.embed_tuning
default_catalog.rag_tuning = tenant.rag_tuning
session.add(default_catalog)
session.commit()
new_catalog_id = default_catalog.id
logger.info(f"Default catalog created with ID: {new_catalog_id}")
# Step 2: Update all documents to use this new catalog
logger.info("Updating documents with the new catalog ID.")
documents = session.query(Document).all()
for document in documents:
document.catalog_id = new_catalog_id
session.commit()
logger.info(f"Updated {len(documents)} documents with new catalog ID.")
except Exception as e:
logger.error(f"An error occurred during migration: {e}")
session.rollback()
raise
finally:
session.close()
logger.info("Migration completed successfully.")
def downgrade():
pass