Setup of documents view
This commit is contained in:
59
eveai_app/worker/tasks.py
Normal file
59
eveai_app/worker/tasks.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
from flask import current_app
|
||||
from langchain_mistralai import MistralAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.document_loaders.pdf import PyPDFLoader
|
||||
from langchain_community.vectorstores.chroma import Chroma
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
import os
|
||||
|
||||
from eveai_app import celery
|
||||
from ..utils.database import Database
|
||||
from ..models.document import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI
|
||||
from .. import db
|
||||
|
||||
|
||||
@celery.task(name='create_embeddings', queue='embeddings')
|
||||
def create_embeddings(tenant_id, document_version_id, embedding_model_def):
|
||||
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id} '
|
||||
f'with model {embedding_model_def}')
|
||||
Database(tenant_id).switch_schema()
|
||||
document_version = DocumentVersion.query.get(document_version_id)
|
||||
if document_version is None:
|
||||
current_app.logger.error(f'Cannot create embeddings for tenant {tenant_id}. '
|
||||
f'Document version {document_version_id} not found')
|
||||
return
|
||||
db.session.add(document_version)
|
||||
|
||||
# start processing
|
||||
document_version.processing = True
|
||||
document_version.processing_started_at = dt.now(tz.utc)
|
||||
db.session.commit()
|
||||
|
||||
embedding_provider = embedding_model_def.rsplit('.', 1)[0]
|
||||
embedding_model = embedding_model_def.rsplit('.', 1)[1]
|
||||
# define embedding variables
|
||||
match (embedding_provider, embedding_model):
|
||||
case ('openai', 'text-embedding-3-small'):
|
||||
embedding_model = EmbeddingSmallOpenAI()
|
||||
case ('mistral', 'text-embedding-3-small'):
|
||||
embedding_model = EmbeddingMistral()
|
||||
|
||||
match document_version.file_type:
|
||||
case 'pdf':
|
||||
pdf_file = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
||||
document_version.file_location,
|
||||
document_version.file_path)
|
||||
loader = PyPDFLoader(pdf_file)
|
||||
|
||||
# We
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
documents = text_splitter.split_documents(loader.load())
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@celery.task(name='ask_eveAI', queue='llm_interactions')
|
||||
def ask_eve_ai(query):
|
||||
# Interaction logic with LLMs like GPT (Langchain API calls, etc.)
|
||||
pass
|
||||
Reference in New Issue
Block a user