- Adding a Tenant Type

- Allow filtering on Tenant Types & searching for parts of Tenant names
- Implement health checks
- Start Prometheus monitoring (needs to be finalized)
- Refine audio_processor and srt_processor to reduce duplicate code and support for larger files
- Introduce repopack to reason in LLMs about the code
This commit is contained in:
Josako
2024-09-13 15:43:40 +02:00
parent 9e14824249
commit 6cf660e622
41 changed files with 687 additions and 579 deletions

View File

@@ -25,6 +25,12 @@ from eveai_workers.Processors.pdf_processor import PDFProcessor
from eveai_workers.Processors.srt_processor import SRTProcessor
# Healthcheck task
@current_celery.task(name='ping', queue='embeddings')
def ping():
return 'pong'
@current_celery.task(name='create_embeddings', queue='embeddings')
def create_embeddings(tenant_id, document_version_id):
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id}.')
@@ -184,14 +190,21 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
chunk_total_context = (f'Filename: {document_version.file_name}\n'
f'User Context:\n{document_version.user_context}\n\n'
f'User Metadata:\n{document_version.user_metadata}\n\n'
f'Title: {title}\n'
f'{summary}\n'
f'{document_version.system_context}\n\n')
f'Summary:\n{summary}\n'
f'System Context:\n{document_version.system_context}\n\n'
f'System Metadata:\n{document_version.system_metadata}\n\n'
)
enriched_chunks = []
initial_chunk = (f'Filename: {document_version.file_name}\n'
f'User Context:\n{document_version.user_context}\n\n'
f'User Metadata:\n{document_version.user_metadata}\n\n'
f'Title: {title}\n'
f'{chunks[0]}')
f'System Context:\n{document_version.system_context}\n\n'
f'System Metadata:\n{document_version.system_metadata}\n\n'
f'{chunks[0]}'
)
enriched_chunks.append(initial_chunk)
for chunk in chunks[1:]: