- Improve annotation algorithm for Youtube (and others)
- Patch Pytube - improve OS deletion of files and writing of files - Start working on Claude - Improve template management
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
import langcodes
|
||||
from flask import current_app
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
import ast
|
||||
from typing import List
|
||||
from openai import OpenAI
|
||||
# from groq import Groq
|
||||
|
||||
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI
|
||||
|
||||
@@ -121,31 +122,46 @@ def select_model_variables(tenant):
|
||||
tool_calling_supported = False
|
||||
match llm_model:
|
||||
case 'gpt-4-turbo' | 'gpt-4o':
|
||||
summary_template = current_app.config.get('GPT4_SUMMARY_TEMPLATE')
|
||||
rag_template = current_app.config.get('GPT4_RAG_TEMPLATE')
|
||||
history_template = current_app.config.get('GPT4_HISTORY_TEMPLATE')
|
||||
encyclopedia_template = current_app.config.get('GPT4_ENCYCLOPEDIA_TEMPLATE')
|
||||
transcript_template = current_app.config.get('GPT4_TRANSCRIPT_TEMPLATE')
|
||||
html_parse_template = current_app.config.get('GPT4_HTML_PARSE_TEMPLATE')
|
||||
pdf_parse_template = current_app.config.get('GPT4_PDF_PARSE_TEMPLATE')
|
||||
tool_calling_supported = True
|
||||
case _:
|
||||
raise Exception(f'Error setting model variables for tenant {tenant.id} '
|
||||
f'error: Invalid chat model')
|
||||
model_variables['summary_template'] = summary_template
|
||||
model_variables['rag_template'] = rag_template
|
||||
model_variables['history_template'] = history_template
|
||||
model_variables['encyclopedia_template'] = encyclopedia_template
|
||||
model_variables['transcript_template'] = transcript_template
|
||||
model_variables['html_parse_template'] = html_parse_template
|
||||
model_variables['pdf_parse_template'] = pdf_parse_template
|
||||
if tool_calling_supported:
|
||||
model_variables['cited_answer_cls'] = CitedAnswer
|
||||
case 'anthropic':
|
||||
api_key = current_app.config.get('ANTHROPIC_API_KEY')
|
||||
# Anthropic does not have the same 'generic' model names as OpenAI
|
||||
llm_model_ext = current_app.config.get('ANTHROPIC_LLM_VERSIONS').get(llm_model)
|
||||
model_variables['llm'] = ChatAnthropic(api_key=api_key,
|
||||
model=llm_model_ext,
|
||||
temperature=model_variables['RAG_temperature'])
|
||||
model_variables['llm_no_rag'] = ChatAnthropic(api_key=api_key,
|
||||
model=llm_model_ext,
|
||||
temperature=model_variables['RAG_temperature'])
|
||||
tool_calling_supported = True
|
||||
case _:
|
||||
raise Exception(f'Error setting model variables for tenant {tenant.id} '
|
||||
f'error: Invalid chat provider')
|
||||
|
||||
# Transcription Client Variables. Only Whisper-1 of OpenAI is currently supported
|
||||
if tool_calling_supported:
|
||||
model_variables['cited_answer_cls'] = CitedAnswer
|
||||
|
||||
templates = current_app.config['PROMPT_TEMPLATES'][f'{llm_provider}.{llm_model}']
|
||||
model_variables['summary_template'] = templates['summary']
|
||||
model_variables['rag_template'] = templates['rag']
|
||||
model_variables['history_template'] = templates['history']
|
||||
model_variables['encyclopedia_template'] = templates['encyclopedia']
|
||||
model_variables['transcript_template'] = templates['transcript']
|
||||
model_variables['html_parse_template'] = templates['html_parse']
|
||||
model_variables['pdf_parse_template'] = templates['pdf_parse']
|
||||
|
||||
model_variables['annotation_chunk_length'] = current_app.config['ANNOTATION_TEXT_CHUNK_LENGTH'][tenant.llm_model]
|
||||
|
||||
# Transcription Client Variables.
|
||||
# Using Groq
|
||||
# api_key = current_app.config.get('GROQ_API_KEY')
|
||||
# model_variables['transcription_client'] = Groq(api_key=api_key)
|
||||
# model_variables['transcription_model'] = 'whisper-large-v3'
|
||||
|
||||
# Using OpenAI
|
||||
api_key = current_app.config.get('OPENAI_API_KEY')
|
||||
model_variables['transcription_client'] = OpenAI(api_key=api_key)
|
||||
model_variables['transcription_model'] = 'whisper-1'
|
||||
|
||||
30
common/utils/os_utils.py
Normal file
30
common/utils/os_utils.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import os
|
||||
import gevent
|
||||
import time
|
||||
from flask import current_app
|
||||
|
||||
|
||||
def safe_remove(file_path):
|
||||
try:
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
|
||||
# Wait for the file to be deleted
|
||||
start_time = time.time()
|
||||
while os.path.exists(file_path):
|
||||
gevent.sleep(1)
|
||||
if time.time() - start_time > 5: # 5 second timeout
|
||||
raise TimeoutError(f"Failed to delete {file_path} after 5 seconds")
|
||||
|
||||
current_app.logger.info(f"Successfully deleted {file_path}")
|
||||
else:
|
||||
current_app.logger.info(f"{file_path} does not exist, skipping deletion")
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error deleting {file_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def sync_folder(file_path):
|
||||
dir_fd = os.open(file_path, os.O_RDONLY)
|
||||
os.fsync(dir_fd)
|
||||
os.close(dir_fd)
|
||||
15
common/utils/prompt_loader.py
Normal file
15
common/utils/prompt_loader.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import os
|
||||
import yaml
|
||||
|
||||
|
||||
def load_prompt_templates(model_name):
|
||||
provider, model = model_name.split('.')
|
||||
file_path = os.path.join('config', 'prompts', provider, f'{model}.yaml')
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"No prompt template file found for {model_name}")
|
||||
|
||||
with open(file_path, 'r') as file:
|
||||
templates = yaml.safe_load(file)
|
||||
|
||||
return templates
|
||||
Reference in New Issue
Block a user