- Improve annotation algorithm for Youtube (and others)

- Patch Pytube
- improve OS deletion of files and writing of files
- Start working on Claude
- Improve template management
This commit is contained in:
Josako
2024-07-16 14:21:49 +02:00
parent db44fd3b66
commit 908a2eaf7e
39 changed files with 6427 additions and 324 deletions

View File

@@ -14,21 +14,9 @@ do
esac
done
# Check if the directory is provided
if [ -z "$directory" ]; then
echo "Directory is required."
echo "Usage: ./compress.sh -d <audio_folder> -i <input_file> -o <output_file>"
exit 1
fi
if [ -z "$input_file" ]; then
echo "Input file is required."
echo "Usage: ./compress.sh -d <audio_folder> -i <input_file> -o <output_file>"
exit 1
fi
if [ -z "$output_file" ]; then
echo "Output file is required."
# Check if all required arguments are provided
if [ -z "$directory" ] || [ -z "$input_file" ] || [ -z "$output_file" ]; then
echo "All arguments are required."
echo "Usage: ./compress.sh -d <audio_folder> -i <input_file> -o <output_file>"
exit 1
fi
@@ -38,20 +26,47 @@ cd "$directory" || exit 1
# Compress the file
/usr/bin/ffmpeg -i "$input_file" -ar 16000 -ac 1 -map 0:a "$output_file"
WAIT_TIME=5
# Ensure the file is fully written to disk
sync "$output_file"
# Function to check for file existence
WAIT_TIME=5
MAX_ATTEMPTS=12 # 1 minute total wait time
# Function to check for file existence and stability
check_file() {
if [ -f "$output_file" ]; then
echo "File $output_file is available."
return 0
initial_size=$(stat -c %s "$output_file")
sleep 2
current_size=$(stat -c %s "$output_file")
if [ "$initial_size" -eq "$current_size" ]; then
echo "File $output_file is available and stable."
return 0
else
echo "File $output_file is still being written. Waiting..."
return 1
fi
else
echo "File $output_file is not available yet. Waiting..."
echo "File $output_file does not exist. Waiting..."
return 1
fi
}
# Wait for the file to become available
# Wait for the file to become available and stable
attempt=0
while ! check_file; do
sleep $WAIT_TIME
attempt=$((attempt + 1))
if [ $attempt -ge $MAX_ATTEMPTS ]; then
echo "Max attempts reached. File may not be fully written."
exit 1
fi
done
# Final check using lsof
if lsof "$output_file" > /dev/null 2>&1; then
echo "Warning: File $output_file is still open by another process."
exit 1
else
echo "File $output_file is fully written and closed."
exit 0
fi

View File

@@ -24,7 +24,7 @@ echo "Applying migrations to the public and tenant schema..."
# Set FLASK_APP environment variables
PROJECT_DIR="/app"
export FLASK_APP=${PROJECT_DIR}/scripts/run_eveai_app.py # Adjust the path to your Flask app entry point
export PYTHONPATH="$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path
export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages
# Run Alembic upgrade for the public schema
echo "Applying migrations to the public schema..."

View File

@@ -1,7 +1,8 @@
#!/bin/bash
cd "/app/" || exit 1
export PYTHONPATH="$PYTHONPATH:/app/"
export PROJECT_DIR="/app"
export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages
# Set flask environment variables
#export FLASK_ENV=development # Use 'production' as appropriate

View File

@@ -1,7 +1,8 @@
#!/usr/bin/env bash
cd "/app/" || exit 1
export PYTHONPATH="$PYTHONPATH:/app/"
export PROJECT_DIR="/app"
export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages
# Start a worker for the 'llm_interactions' queue with auto-scaling
celery -A eveai_chat_workers.celery worker --loglevel=info -Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h &

View File

@@ -1,7 +1,8 @@
#!/bin/bash
cd "/app/" || exit 1
export PYTHONPATH="$PYTHONPATH:/app/"
export PROJECT_DIR="/app"
export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages
# Start a worker for the 'embeddings' queue with higher concurrency
celery -A eveai_workers.celery worker --loglevel=info -Q embeddings --autoscale=2,8 --hostname=embeddings_worker@%h &