- Significantly changed the PDF Processor to use Mistral's OCR model

- ensure very long chunks get split into smaller chunks
- ensure TrackedMistralAIEmbedding is batched if needed to ensure correct execution
- upgraded some of the packages to a higher version
This commit is contained in:
Josako
2025-04-16 15:39:16 +02:00
parent 5f58417d24
commit 4bf12db142
10 changed files with 518 additions and 91 deletions

View File

@@ -0,0 +1,31 @@
"""Add nr_of_pages to llm_metrics in BusinessEvent
Revision ID: 605395afc22f
Revises: cfee2c5bcd7a
Create Date: 2025-04-16 07:25:43.959618
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '605395afc22f'
down_revision = 'cfee2c5bcd7a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('business_event_log', schema=None) as batch_op:
batch_op.add_column(sa.Column('llm_metrics_nr_of_pages', sa.Integer(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('business_event_log', schema=None) as batch_op:
batch_op.drop_column('llm_metrics_nr_of_pages')
# ### end Alembic commands ###