Youtube added - further checking required

This commit is contained in:
Josako
2024-07-04 08:11:31 +02:00
parent 19e57f5adf
commit 8e1dac0233
17 changed files with 386 additions and 11 deletions

View File

@@ -58,7 +58,7 @@ class Config(object):
SUPPORTED_LANGUAGES = ['en', 'fr', 'nl', 'de', 'es']
# supported LLMs
SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'mistral.mistral-embed']
SUPPORTED_EMBEDDINGS = ['openai.text-embedding-3-small', 'openai.text-embedding-3-large', 'mistral.mistral-embed']
SUPPORTED_LLMS = ['openai.gpt-4o', 'openai.gpt-4-turbo', 'openai.gpt-3.5-turbo', 'mistral.mistral-large-2402']
# Celery settings
@@ -123,6 +123,32 @@ class Config(object):
Question:
{question}"""
GPT3_5_ENCYCLOPEDIA_TEMPLATE = """You have a lot of background knowledge, and as such you are some kind of
'encyclopedia' to explain general terminology. Only answer if you have a clear understanding of the question.
If not, say you do not have sufficient information to answer the question. Use the {language} in your communication.
Question:
{question}"""
GPT4_TRANSCRIPT_TEMPLATE = """You are a transcription editor that improves a given transcript on several parts
and returns markdown. Without changing what people say. The transcript is delimited between triple backquotes.
Do the following:
- divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
- annotate the text to identify these logical parts using headings (max 2 levels) in the same language as the transcript.
- improve errors in the transcript given the context, but leave the text intact.
```{transcript}```
"""
GPT3_5_TRANSCRIPT_TEMPLATE = """You are a transcription editor that improves a given transcript on several parts
and returns markdown. Without changing what people say. The transcript is delimited between triple backquotes.
Do the following:
- divide the transcript into several logical parts. Ensure questions and their answers are in the same logical part.
- annotate the text to identify these logical parts using headings (max 2 levels) in the same language as the transcript.
- improve errors in the transcript given the context, but leave the text intact.
```{transcript}```
"""
# SocketIO settings
# SOCKETIO_ASYNC_MODE = 'threading'
SOCKETIO_ASYNC_MODE = 'gevent'
@@ -182,6 +208,9 @@ class DevConfig(Config):
# OpenAI API Keys
OPENAI_API_KEY = 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
# Groq API Keys
GROQ_API_KEY = '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'
# Unstructured settings
UNSTRUCTURED_API_KEY = 'pDgCrXumYhM3CNvjvwV8msMldXC3uw'
UNSTRUCTURED_BASE_URL = 'https://flowitbv-16c4us0m.api.unstructuredapp.io'
@@ -209,6 +238,9 @@ class DevConfig(Config):
# Session settings
SESSION_REDIS = redis.from_url('redis://redis:6379/2')
# PATH settings
ffmpeg_path = '/usr/bin/ffmpeg'
class ProdConfig(Config):
DEVELOPMENT = False