- Significantly changed the PDF Processor to use Mistral's OCR model

- ensure very long chunks get split into smaller chunks
- ensure TrackedMistralAIEmbedding is batched if needed to ensure correct execution
- upgraded some of the packages to a higher version
This commit is contained in:
Josako
2025-04-16 15:39:16 +02:00
parent 5f58417d24
commit 4bf12db142
10 changed files with 518 additions and 91 deletions

View File

@@ -1,7 +1,7 @@
alembic~=1.14.1
alembic~=1.15.2
annotated-types~=0.7.0
bcrypt~=4.1.3
beautifulsoup4~=4.12.3
bcrypt~=4.3.0
beautifulsoup4~=4.13.4
celery~=5.4.0
certifi~=2024.7.4
chardet~=5.2.0
@@ -9,29 +9,29 @@ cors~=1.0.1
Flask~=3.1.0
Flask-BabelEx~=0.9.4
Flask-Bootstrap~=3.3.7.1
Flask-Cors~=5.0.0
Flask-Cors~=5.0.1
Flask-JWT-Extended~=4.7.1
Flask-Login~=0.6.3
flask-mailman~=1.1.1
Flask-Migrate~=4.1.0
Flask-Principal~=0.4.0
Flask-Security-Too~=5.6.0
Flask-Security-Too~=5.6.1
Flask-Session~=0.8.0
Flask-SQLAlchemy~=3.1.1
Flask-WTF~=1.2.1
gevent~=24.2.1
gevent~=24.11.1
gevent-websocket~=0.10.1
greenlet~=3.0.3
gunicorn~=22.0.0
Jinja2~=3.1.4
Jinja2~=3.1.6
kombu~=5.3.7
langchain~=0.3.0
langchain-anthropic~=0.2.0
langchain-community~=0.3.0
langchain-core~=0.3.0
langchain-mistralai~=0.2.0
langchain-openai~=0.3.5
langchain-postgres~=0.0.12
langchain~=0.3.23
langchain-anthropic~=0.3.11
langchain-community~=0.3.21
langchain-core~=0.3.52
langchain-mistralai~=0.2.10
langchain-openai~=0.3.13
langchain-postgres~=0.0.14
langchain-text-splitters~=0.3.0
langcodes~=3.4.0
langdetect~=1.0.9
@@ -41,7 +41,7 @@ pg8000~=1.31.2
pgvector~=0.2.5
pycryptodome~=3.20.0
pydantic~=2.9.1
PyJWT~=2.8.0
PyJWT~=2.10.1
python-dateutil~=2.9.0.post0
python-engineio~=4.9.1
python-iso639~=2024.4.27
@@ -50,11 +50,11 @@ pytz~=2024.1
PyYAML~=6.0.2
redis~=5.0.4
requests~=2.32.3
SQLAlchemy~=2.0.35
SQLAlchemy~=2.0.40
tiktoken~=0.7.0
tzdata~=2024.1
urllib3~=2.2.2
WTForms~=3.1.2
WTForms~=3.2.1
wtforms-html5~=0.6.1
zxcvbn~=4.4.28
groq~=0.9.0
@@ -84,10 +84,10 @@ typing_extensions~=4.12.2
babel~=2.16.0
dogpile.cache~=1.3.3
python-docx~=1.1.2
crewai~=0.108.0
crewai~=0.114.0
sseclient~=0.0.27
termcolor~=2.5.0
mistral-common~=1.5.3
mistralai~=1.5.0
mistralai~=1.6.0
contextvars~=2.4
pandas~=2.2.3