diff --git a/common/utils/execution_progress.py b/common/utils/execution_progress.py index 2b69c17..434a6b8 100644 --- a/common/utils/execution_progress.py +++ b/common/utils/execution_progress.py @@ -31,6 +31,8 @@ class ExecutionProgressTracker: def send_update(self, ctask_id: str, processing_type: str, data: dict): """Send an update about execution progress""" try: + current_app.logger.debug(f"Sending update for {ctask_id} with processing type {processing_type} and data:\n" + f"{data}") key = self._get_key(ctask_id) # First verify Redis is still connected diff --git a/common/utils/model_utils.py b/common/utils/model_utils.py index 3a239d7..e4b4013 100644 --- a/common/utils/model_utils.py +++ b/common/utils/model_utils.py @@ -6,7 +6,6 @@ from langchain_core.language_models import BaseChatModel from common.langchain.llm_metrics_handler import LLMMetricsHandler from langchain_openai import ChatOpenAI -from langchain_anthropic import ChatAnthropic from langchain_mistralai import ChatMistralAI from flask import current_app diff --git a/config/config.py b/config/config.py index 6d022d9..6e711a4 100644 --- a/config/config.py +++ b/config/config.py @@ -292,7 +292,7 @@ class DevConfig(Config): # OBJECT STORAGE OBJECT_STORAGE_TYPE = 'MINIO' - OBJECT_STORAGE_TENANT_BASE = 'Bucket' + OBJECT_STORAGE_TENANT_BASE = 'bucket' # MINIO MINIO_ENDPOINT = 'minio:9000' MINIO_ACCESS_KEY = 'minioadmin' @@ -316,7 +316,7 @@ class StagingConfig(Config): # OBJECT STORAGE OBJECT_STORAGE_TYPE = 'SCALEWAY' - OBJECT_STORAGE_TENANT_BASE = 'Folder' + OBJECT_STORAGE_TENANT_BASE = 'folder' OBJECT_STORAGE_BUCKET_NAME = 'eveai-staging' # MINIO MINIO_ENDPOINT = environ.get('MINIO_ENDPOINT') diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base new file mode 100644 index 0000000..4304ce4 --- /dev/null +++ b/docker/Dockerfile.base @@ -0,0 +1,36 @@ +ARG PYTHON_VERSION=3.12.11 +FROM python:${PYTHON_VERSION}-slim as base + +ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + gcc \ + postgresql-client \ + curl \ + tini \ + && rm -rf /var/lib/apt/lists/* + +ARG UID=10001 +ARG GID=10001 +RUN groupadd -g ${GID} appuser && useradd -u ${UID} -g ${GID} -M -d /nonexistent -s /usr/sbin/nologin appuser + +WORKDIR /app + +RUN mkdir -p /app/logs && chown -R appuser:appuser /app +COPY ../requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY ../common /app/common +COPY ../config /app/config +COPY ../scripts /app/scripts + +RUN chown -R appuser:appuser /app && chmod +x /app/scripts/start.sh + +ENV PYTHONPATH=/app:/app/patched_packages:${PYTHONPATH} + +USER appuser +EXPOSE 8080 + +ENTRYPOINT ["/usr/bin/tini","-g","--"] +CMD ["bash","-lc","scripts/start.sh"] \ No newline at end of file diff --git a/docker/build_and_push_eveai.sh b/docker/build_and_push_eveai.sh index 6434757..9cc4a15 100755 --- a/docker/build_and_push_eveai.sh +++ b/docker/build_and_push_eveai.sh @@ -34,12 +34,16 @@ ACTION="both" NO_CACHE="" PROGRESS="" DEBUG="" +BUILD_BASE="" +BASE_ONLY="" # Function to display usage information usage() { - echo "Usage: $0 [-b|-p] [--no-cache] [--progress=plain] [--debug] [service1 service2 ...]" + echo "Usage: $0 [-b|-p|-bb|--base-only] [--no-cache] [--progress=plain] [--debug] [service1 service2 ...]" echo " -b: Build only" echo " -p: Push only" + echo " -bb: Build base image (in addition to services)" + echo " --base-only: Build only base image (skip services)" echo " --no-cache: Perform a clean build without using cache" echo " --progress=plain: Show detailed progress of the build" echo " --debug: Enable debug mode for the build" @@ -59,6 +63,14 @@ while [[ $# -gt 0 ]]; do ACTION="push" shift ;; + -bb) + BUILD_BASE="true" + shift + ;; + --base-only) + BASE_ONLY="true" + shift + ;; --no-cache) NO_CACHE="--no-cache" shift @@ -82,6 +94,41 @@ while [[ $# -gt 0 ]]; do esac done +# Function to build base image +build_base_image() { + echo "🏗️ Building base image..." + local BASE_IMAGE_NAME="$REGISTRY/$ACCOUNT/eveai-base:$TAG" + + echo "Building base image for platform: $PLATFORM" + echo "Base image tag: $BASE_IMAGE_NAME" + + podman build \ + --platform "$PLATFORM" \ + $NO_CACHE \ + $PROGRESS \ + $DEBUG \ + -t "$ACCOUNT/eveai-base:$TAG" \ + -t "$BASE_IMAGE_NAME" \ + -f Dockerfile.base \ + .. + + if [ "$ACTION" = "push" ] || [ "$ACTION" = "both" ]; then + echo "Pushing base image to registry..." + podman push "$BASE_IMAGE_NAME" + fi + + echo "✅ Base image built successfully" +} + +# Function to check if we should build base image +should_build_base() { + if [ "$BUILD_BASE" = "true" ] || [ "$BASE_ONLY" = "true" ]; then + return 0 # true + else + return 1 # false + fi +} + # Function to build and/or push a service process_service() { local SERVICE="$1" @@ -167,6 +214,20 @@ else SERVICES=("$@") fi +# Handle base-only mode +if [ "$BASE_ONLY" = "true" ]; then + echo "🎯 Base-only mode: Building only base image" + build_base_image + echo -e "\033[32m✅ Base image build completed!\033[0m" + exit 0 +fi + +# Build base image if requested +if should_build_base; then + build_base_image + echo "" # Empty line for readability +fi + echo "Using simplified AMD64-only approach for maximum compatibility..." echo "Images will be tagged as: $REGISTRY/$ACCOUNT/[service]:$TAG" diff --git a/docker/compose_dev.yaml b/docker/compose_dev.yaml index 4a05398..69d4d94 100644 --- a/docker/compose_dev.yaml +++ b/docker/compose_dev.yaml @@ -63,12 +63,20 @@ services: context: .. dockerfile: ./docker/eveai_app/Dockerfile ports: - - 3001:5001 # Dev app volgens port schema + - 3001:8080 # Dev app volgens port schema expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_app + ROLE: web + PORT: 8080 + WORKERS: 1 # Dev: lagere concurrency + WORKER_CLASS: gevent + WORKER_CONN: 100 + LOGLEVEL: info # Lowercase voor gunicorn + MAX_REQUESTS: 1000 + MAX_REQUESTS_JITTER: 100 volumes: - ../eveai_app:/app/eveai_app - ../common:/app/common @@ -86,7 +94,7 @@ services: minio: condition: service_healthy healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5001/healthz/ready"] + test: ["CMD", "curl", "-f", "http://localhost:8080/healthz/ready"] interval: 30s timeout: 10s retries: 3 @@ -104,6 +112,11 @@ services: environment: <<: *common-variables COMPONENT_NAME: eveai_workers + ROLE: worker + CELERY_CONCURRENCY: 1 # Dev: lagere concurrency + CELERY_LOGLEVEL: INFO # Uppercase voor celery + CELERY_MAX_TASKS_PER_CHILD: 1000 + CELERY_PREFETCH: 1 volumes: - ../eveai_workers:/app/eveai_workers - ../common:/app/common @@ -127,12 +140,20 @@ services: context: .. dockerfile: ./docker/eveai_chat_client/Dockerfile ports: - - 3004:5004 # Dev chat client volgens port schema + - 3004:8080 # Dev chat client volgens port schema expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_chat_client + ROLE: web + PORT: 8080 + WORKERS: 1 # Dev: lagere concurrency + WORKER_CLASS: gevent + WORKER_CONN: 100 + LOGLEVEL: info # Lowercase voor gunicorn + MAX_REQUESTS: 1000 + MAX_REQUESTS_JITTER: 100 volumes: - ../eveai_chat_client:/app/eveai_chat_client - ../common:/app/common @@ -148,7 +169,7 @@ services: minio: condition: service_healthy healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5004/healthz/ready"] + test: ["CMD", "curl", "-f", "http://localhost:8080/healthz/ready"] interval: 30s timeout: 10s retries: 3 @@ -166,6 +187,11 @@ services: environment: <<: *common-variables COMPONENT_NAME: eveai_chat_workers + ROLE: worker + CELERY_CONCURRENCY: 8 # Dev: lagere concurrency + CELERY_LOGLEVEL: INFO # Uppercase voor celery + CELERY_MAX_TASKS_PER_CHILD: 1000 + CELERY_PREFETCH: 1 volumes: - ../eveai_chat_workers:/app/eveai_chat_workers - ../common:/app/common @@ -187,12 +213,20 @@ services: context: .. dockerfile: ./docker/eveai_api/Dockerfile ports: - - 3003:5003 # Dev API volgens port schema + - 3003:8080 # Dev API volgens port schema expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_api + ROLE: web + PORT: 8080 + WORKERS: 1 # Dev: lagere concurrency + WORKER_CLASS: gevent + WORKER_CONN: 100 + LOGLEVEL: info # Lowercase voor gunicorn + MAX_REQUESTS: 1000 + MAX_REQUESTS_JITTER: 100 volumes: - ../eveai_api:/app/eveai_api - ../common:/app/common @@ -208,7 +242,7 @@ services: minio: condition: service_healthy healthcheck: - test: [ "CMD", "curl", "-f", "http://localhost:5003/healthz/ready" ] + test: [ "CMD", "curl", "-f", "http://localhost:8080/healthz/ready" ] interval: 30s timeout: 10s retries: 3 @@ -216,26 +250,28 @@ services: networks: - eveai-dev-network - eveai_beat: - image: ${REGISTRY_PREFIX:-}josakola/eveai_beat:latest - build: - context: .. - dockerfile: ./docker/eveai_beat/Dockerfile - environment: - <<: *common-variables - COMPONENT_NAME: eveai_beat - volumes: - - ../eveai_beat:/app/eveai_beat - - ../common:/app/common - - ../config:/app/config - - ../scripts:/app/scripts - - ../patched_packages:/app/patched_packages - - ./eveai_logs:/app/logs - depends_on: - redis: - condition: service_healthy - networks: - - eveai-dev-network +# eveai_beat: +# image: ${REGISTRY_PREFIX:-}josakola/eveai_beat:latest +# build: +# context: .. +# dockerfile: ./docker/eveai_beat/Dockerfile +# environment: +# <<: *common-variables +# COMPONENT_NAME: eveai_beat +# ROLE: beat +# CELERY_LOGLEVEL: INFO # Uppercase voor celery +# volumes: +# - ../eveai_beat:/app/eveai_beat +# - ../common:/app/common +# - ../config:/app/config +# - ../scripts:/app/scripts +# - ../patched_packages:/app/patched_packages +# - ./eveai_logs:/app/logs +# depends_on: +# redis: +# condition: service_healthy +# networks: +# - eveai-dev-network eveai_entitlements: image: ${REGISTRY_PREFIX:-}josakola/eveai_entitlements:latest @@ -247,6 +283,11 @@ services: environment: <<: *common-variables COMPONENT_NAME: eveai_entitlements + ROLE: worker + CELERY_CONCURRENCY: 1 # Dev: lagere concurrency + CELERY_LOGLEVEL: INFO # Uppercase voor celery + CELERY_MAX_TASKS_PER_CHILD: 1000 + CELERY_PREFETCH: 1 volumes: - ../eveai_entitlements:/app/eveai_entitlements - ../common:/app/common diff --git a/docker/compose_test.yaml b/docker/compose_test.yaml index 33db5a7..a002f03 100644 --- a/docker/compose_test.yaml +++ b/docker/compose_test.yaml @@ -65,12 +65,20 @@ services: eveai_app: image: ${REGISTRY_PREFIX:-}josakola/eveai_app:latest ports: - - 4001:5001 + - 4001:8080 expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_app + ROLE: web + PORT: 8080 + WORKERS: 2 # Test: hogere concurrency + WORKER_CLASS: gevent + WORKER_CONN: 100 + LOGLEVEL: info # Lowercase voor gunicorn + MAX_REQUESTS: 1000 + MAX_REQUESTS_JITTER: 100 volumes: - test_eveai_logs:/app/logs depends_on: @@ -79,7 +87,7 @@ services: minio: condition: service_healthy healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5001/healthz/ready"] + test: ["CMD", "curl", "-f", "http://localhost:8080/healthz/ready"] interval: 30s timeout: 10s retries: 3 @@ -95,6 +103,11 @@ services: environment: <<: *common-variables COMPONENT_NAME: eveai_workers + ROLE: worker + CELERY_CONCURRENCY: 2 # Test: hogere concurrency + CELERY_LOGLEVEL: INFO # Uppercase voor celery + CELERY_MAX_TASKS_PER_CHILD: 1000 + CELERY_PREFETCH: 1 volumes: - test_eveai_logs:/app/logs depends_on: @@ -109,12 +122,20 @@ services: eveai_chat_client: image: ${REGISTRY_PREFIX:-}josakola/eveai_chat_client:latest ports: - - 4004:5004 + - 4004:8080 expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_chat_client + ROLE: web + PORT: 8080 + WORKERS: 2 # Test: hogere concurrency + WORKER_CLASS: gevent + WORKER_CONN: 100 + LOGLEVEL: info # Lowercase voor gunicorn + MAX_REQUESTS: 1000 + MAX_REQUESTS_JITTER: 100 volumes: - test_eveai_logs:/app/logs depends_on: @@ -123,7 +144,7 @@ services: minio: condition: service_healthy healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5004/healthz/ready"] + test: ["CMD", "curl", "-f", "http://localhost:8080/healthz/ready"] interval: 30s timeout: 10s retries: 3 @@ -139,6 +160,11 @@ services: environment: <<: *common-variables COMPONENT_NAME: eveai_chat_workers + ROLE: worker + CELERY_CONCURRENCY: 2 # Test: hogere concurrency + CELERY_LOGLEVEL: INFO # Uppercase voor celery + CELERY_MAX_TASKS_PER_CHILD: 1000 + CELERY_PREFETCH: 1 volumes: - test_eveai_logs:/app/logs depends_on: @@ -151,12 +177,20 @@ services: eveai_api: image: ${REGISTRY_PREFIX:-}josakola/eveai_api:latest ports: - - 4003:5003 + - 4003:8080 expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_api + ROLE: web + PORT: 8080 + WORKERS: 2 # Test: hogere concurrency + WORKER_CLASS: gevent + WORKER_CONN: 100 + LOGLEVEL: info # Lowercase voor gunicorn + MAX_REQUESTS: 1000 + MAX_REQUESTS_JITTER: 100 volumes: - test_eveai_logs:/app/logs depends_on: @@ -165,7 +199,7 @@ services: minio: condition: service_healthy healthcheck: - test: [ "CMD", "curl", "-f", "http://localhost:5003/healthz/ready" ] + test: [ "CMD", "curl", "-f", "http://localhost:8080/healthz/ready" ] interval: 30s timeout: 10s retries: 3 @@ -174,19 +208,21 @@ services: - eveai-test-network restart: unless-stopped - eveai_beat: - image: ${REGISTRY_PREFIX:-}josakola/eveai_beat:latest - environment: - <<: *common-variables - COMPONENT_NAME: eveai_beat - volumes: - - test_eveai_logs:/app/logs - depends_on: - redis: - condition: service_healthy - networks: - - eveai-test-network - restart: unless-stopped +# eveai_beat: +# image: ${REGISTRY_PREFIX:-}josakola/eveai_beat:latest +# environment: +# <<: *common-variables +# COMPONENT_NAME: eveai_beat +# ROLE: beat +# CELERY_LOGLEVEL: INFO # Uppercase voor celery +# volumes: +# - test_eveai_logs:/app/logs +# depends_on: +# redis: +# condition: service_healthy +# networks: +# - eveai-test-network +# restart: unless-stopped eveai_entitlements: image: ${REGISTRY_PREFIX:-}josakola/eveai_entitlements:latest @@ -195,6 +231,11 @@ services: environment: <<: *common-variables COMPONENT_NAME: eveai_entitlements + ROLE: worker + CELERY_CONCURRENCY: 2 # Test: hogere concurrency + CELERY_LOGLEVEL: INFO # Uppercase voor celery + CELERY_MAX_TASKS_PER_CHILD: 1000 + CELERY_PREFETCH: 1 volumes: - test_eveai_logs:/app/logs depends_on: diff --git a/docker/eveai_api/Dockerfile b/docker/eveai_api/Dockerfile index 79f807d..537b5dd 100644 --- a/docker/eveai_api/Dockerfile +++ b/docker/eveai_api/Dockerfile @@ -1,70 +1,5 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -RUN apt-get update && apt-get install -y \ - build-essential \ - gcc \ - postgresql-client \ - curl \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs - -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - -# Copy the source code into the container. +# Copy the service-specific source code into the container. COPY eveai_api /app/eveai_api -COPY common /app/common -COPY config /app/config -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages -# Set permissions for entrypoint script -RUN chmod 777 /app/scripts/entrypoint.sh - -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Expose the port that the application listens on. -EXPOSE 5003 - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint.sh"] -CMD ["/app/scripts/start_eveai_api.sh"] diff --git a/docker/eveai_app/Dockerfile b/docker/eveai_app/Dockerfile index f302fff..7b88912 100644 --- a/docker/eveai_app/Dockerfile +++ b/docker/eveai_app/Dockerfile @@ -1,72 +1,4 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base - -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -RUN apt-get update && apt-get install -y \ - build-essential \ - gcc \ - postgresql-client \ - curl \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs - -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest # Copy the source code into the container. COPY eveai_app /app/eveai_app -COPY common /app/common -COPY config /app/config -COPY migrations /app/migrations -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages COPY content /app/content - -# Set permissions for entrypoint script -RUN chmod 777 /app/scripts/entrypoint.sh - -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Expose the port that the application listens on. -EXPOSE 5001 - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint.sh"] -CMD ["/app/scripts/start_eveai_app.sh"] diff --git a/docker/eveai_beat/Dockerfile b/docker/eveai_beat/Dockerfile index 9fb8ce1..39359e1 100644 --- a/docker/eveai_beat/Dockerfile +++ b/docker/eveai_beat/Dockerfile @@ -1,65 +1,5 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -#RUN apt-get update && apt-get install -y \ -# build-essential \ -# gcc \ -# && apt-get clean \ -# && rm -rf /var/lib/apt/lists/* - -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs - -# Install Python dependencies. - -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - -# Copy the source code into the container. +# Copy the service-specific source code into the container. COPY eveai_beat /app/eveai_beat -COPY common /app/common -COPY config /app/config -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages -COPY --chown=root:root scripts/entrypoint_no_db.sh /app/scripts/ -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint_no_db.sh"] -CMD ["/app/scripts/start_eveai_beat.sh"] diff --git a/docker/eveai_chat_client/Dockerfile b/docker/eveai_chat_client/Dockerfile index b5d0eb0..8e15da0 100644 --- a/docker/eveai_chat_client/Dockerfile +++ b/docker/eveai_chat_client/Dockerfile @@ -1,72 +1,6 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -RUN apt-get update && apt-get install -y \ - build-essential \ - gcc \ - postgresql-client \ - curl \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs - -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - -# Copy the source code into the container. +# Copy the service-specific source code into the container. COPY eveai_chat_client /app/eveai_chat_client -COPY common /app/common -COPY config /app/config -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages COPY content /app/content -# Set permissions for scripts -RUN chmod 777 /app/scripts/entrypoint.sh && \ - chmod 777 /app/scripts/start_eveai_chat_client.sh - -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Expose the port that the application listens on. -EXPOSE 5004 - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint.sh"] -CMD ["/app/scripts/start_eveai_chat_client.sh"] diff --git a/docker/eveai_chat_workers/Dockerfile b/docker/eveai_chat_workers/Dockerfile index 87bb93a..d278412 100644 --- a/docker/eveai_chat_workers/Dockerfile +++ b/docker/eveai_chat_workers/Dockerfile @@ -1,68 +1,10 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -RUN apt-get update && apt-get install -y \ - build-essential \ - gcc \ - postgresql-client \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs +# Service-specific directories (preserve crewai_storage) +USER root RUN mkdir -p /app/crewai_storage && chown -R appuser:appuser /app/crewai_storage +USER appuser -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - -# Copy the source code into the container. +# Copy the service-specific source code into the container. COPY eveai_chat_workers /app/eveai_chat_workers -COPY common /app/common -COPY config /app/config -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages -COPY --chown=root:root scripts/entrypoint.sh /app/scripts/ -# Set permissions for entrypoint script -RUN chmod 777 /app/scripts/entrypoint.sh - -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint.sh"] -CMD ["/app/scripts/start_eveai_chat_workers.sh"] diff --git a/docker/eveai_entitlements/Dockerfile b/docker/eveai_entitlements/Dockerfile index 083c50e..9cdf062 100644 --- a/docker/eveai_entitlements/Dockerfile +++ b/docker/eveai_entitlements/Dockerfile @@ -1,69 +1,5 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -RUN apt-get update && apt-get install -y \ - build-essential \ - gcc \ - postgresql-client \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs - -# Install Python dependencies. - -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - -# Copy the source code into the container. +# Copy the service-specific source code into the container. COPY eveai_entitlements /app/eveai_entitlements -COPY common /app/common -COPY config /app/config -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages -COPY --chown=root:root scripts/entrypoint.sh /app/scripts/ -# Set permissions for entrypoint script -RUN chmod 777 /app/scripts/entrypoint.sh - -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint.sh"] -CMD ["/app/scripts/start_eveai_entitlements.sh"] diff --git a/docker/eveai_workers/Dockerfile b/docker/eveai_workers/Dockerfile index 0ad261e..0512ee7 100644 --- a/docker/eveai_workers/Dockerfile +++ b/docker/eveai_workers/Dockerfile @@ -1,70 +1,12 @@ -ARG PYTHON_VERSION=3.12.7 -FROM python:${PYTHON_VERSION}-slim as base +FROM registry.ask-eve-ai-local.com/josakola/eveai-base:latest -# Prevents Python from writing pyc files. -ENV PYTHONDONTWRITEBYTECODE=1 - -# Keeps Python from buffering stdout and stderr to avoid situations where -# the application crashes without emitting any logs due to buffering. -ENV PYTHONUNBUFFERED=1 - -# Create directory for patched packages and set permissions -RUN mkdir -p /app/patched_packages && \ - chmod 777 /app/patched_packages - -# Ensure patches are applied to the application. -ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH - -WORKDIR /app - -# Create a non-privileged user that the app will run under. -# See https://docs.docker.com/go/dockerfile-user-best-practices/ -ARG UID=10001 -RUN adduser \ - --disabled-password \ - --gecos "" \ - --home "/nonexistent" \ - --shell "/bin/bash" \ - --no-create-home \ - --uid "${UID}" \ - appuser - -# Install necessary packages and build tools -RUN apt-get update && apt-get install -y \ - build-essential \ - gcc \ - postgresql-client \ +# Service-specific packages (ffmpeg only needed for this service) +USER root +RUN apt-get update && apt-get install -y --no-install-recommends \ ffmpeg \ - && apt-get clean \ && rm -rf /var/lib/apt/lists/* +USER appuser -# Create logs directory and set permissions -RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs - -# Install Python dependencies. - -# Download dependencies as a separate step to take advantage of Docker's caching. -# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. -# Leverage a bind mount to requirements.txt to avoid having to copy them into -# into this layer. - -COPY requirements.txt /app/ -RUN python -m pip install -r /app/requirements.txt - -# Copy the source code into the container. +# Copy the service-specific source code into the container. COPY eveai_workers /app/eveai_workers -COPY common /app/common -COPY config /app/config -COPY scripts /app/scripts -COPY patched_packages /app/patched_packages -COPY --chown=root:root scripts/entrypoint.sh /app/scripts/ -# Set permissions for entrypoint script -RUN chmod 777 /app/scripts/entrypoint.sh - -# Set ownership of the application directory to the non-privileged user -RUN chown -R appuser:appuser /app - -# Set entrypoint and command -ENTRYPOINT ["/app/scripts/entrypoint.sh"] -CMD ["/app/scripts/start_eveai_workers.sh"] diff --git a/documentation/k8s_migratie_startup_containers_overzicht.md b/documentation/k8s_migratie_startup_containers_overzicht.md new file mode 100644 index 0000000..c7119f5 --- /dev/null +++ b/documentation/k8s_migratie_startup_containers_overzicht.md @@ -0,0 +1,106 @@ +# Overzicht Kubernetes Migratie — eveai_app & eveai_workers + +Dit document bundelt de belangrijkste inzichten en keuzes rond het migreren van je applicatiecomponenten naar Kubernetes. +(⚠️ Build-optimalisaties zijn hier **niet** meegenomen — die volgen eventueel later.) + +--- + +## 1. Conceptuele inzichten + +### 1.1 Webcomponenten (eveai_app) +- **Gunicorn workers** + - `-w` = aantal processen (CPU-parallelisme). + - `--worker-connections` = max. aantal gelijktijdige connecties *per worker* (bij async workers zoals `gevent`). +- **Memory** + - Elke replica = eigen Pod = eigen memory footprint. + - Binnen een Pod: elke workerproces neemt (bijna) z’n eigen geheugenruimte. + - Image-grootte ≠ runtime memory (grote image → trager deployen, maar geen direct effect op RAM). +- **Schaalstrategie** + - In Kubernetes hou je `-w` laag (1–2), en schaal je horizontaal met replicas. + - Extern verkeer gaat via Ingress (poort 80/443, hostnames/paths). + - Geen nood meer om per service aparte poorten te kiezen zoals in Docker. + +### 1.2 Database migratie & startup-acties +- **Huidige situatie**: migrations + cache-acties gebonden aan container startup. +- **Aanbevolen aanpak**: + - Gebruik **Kubernetes Jobs** voor eenmalige taken (bv. DB-migraties, cache-invalidate, re-index). + - Gebruik **CronJobs** voor geplande taken (bv. nachtelijke purges, periodieke warm-ups). +- **Jobs vs. Helm Hooks** + - *Job*: los uit te voeren, flexibel, manueel of via CI/CD te triggeren. + - *Helm Hook*: gekoppeld aan een release, garandeert volgorde, maar minder flexibel. + - Aanbevolen: **Jobs** voor flexibiliteit en onderhoud. + +### 1.3 Backendcomponenten (eveai_workers met Celery) +- **Worker Pods** + - Eén rol per Pod: + - Workers (prefork pool, concurrency afgestemd op CPU). + - Beat (scheduler) als apart Deployment **indien nodig**. + - Flower (monitoring) als aparte Deployment (optioneel). +- **Betrouwbaarheid** + - Gebruik `acks_late`, `task_reject_on_worker_lost`, en `visibility_timeout`. + - Prefetch multiplier = 1, fairness inschakelen. + - Time limits per taak instellen. + - Taken idempotent maken. +- **Queues & Routing** + - Verdeel workload over meerdere queues (bv. `high`, `default`, `low`). + - Start gespecialiseerde workers die alleen luisteren naar de relevante queue. +- **Autoscaling** + - Kubernetes HPA (CPU/memory) is beperkt. + - Gebruik **KEDA** → schaalt workers op basis van queue-lengte. +- **Graceful shutdown** + - terminationGracePeriodSeconds ≥ task timeout. + - Celery stopt intake en werkt lopende taken af. + +### 1.4 Flower +- Flower is een **monitoring UI voor Celery** (geen onderdeel van je app). +- Kan standalone worden gedeployed via een generieke container (mher/flower). +- Configuratie via Helm chart mogelijk (zelfgemaakt of community chart). +- Overhead van Airflow is niet nodig — enkel Flower volstaat in jouw setup. + +### 1.5 RBAC (conceptueel) +- **Role-Based Access Control** = mechanisme om te bepalen wie wat mag doen in k8s. +- Componenten: + - *Role/ClusterRole* = welke acties op welke resources. + - *RoleBinding/ClusterRoleBinding* = koppelt Role aan gebruikers/ServiceAccounts. + - *ServiceAccount* = identiteit die een Pod gebruikt. +- Belangrijk: principe van *least privilege* → Jobs/Workers krijgen enkel de rechten die ze nodig hebben. + +--- + +## 2. Praktische aandachtspunten + +### Webcomponent (eveai_app) +- Bind port via env var (`PORT=8080`), niet hardcoded. +- Voeg health endpoints toe (`/healthz`) voor readiness/liveness probes. +- Logs naar stdout/stderr. +- Resources instellen voor pods (requests/limits). +- Migrations verplaatsen naar Job/Hook. + +### Backendcomponent (eveai_workers) +- Startscript aanpassen met veilige defaults: + ```bash + celery -A scripts.run_eveai_workers worker --loglevel=INFO --concurrency=2 --max-tasks-per-child=1000 --prefetch-multiplier=1 -O fair + ``` +- Beat enkel gebruiken als je periodieke Celery-taken nodig hebt. +- Flower los deployen (Deployment + Service + optioneel Ingress). +- Overweeg KEDA voor autoscaling op queue-lengte. +- Voeg een PodDisruptionBudget toe om altijd workers beschikbaar te houden. + +### Jobs & CronJobs +- Gebruik Jobs voor DB-migraties en startup-actions (cache invalidatie, warmup). +- Gebruik CronJobs voor geplande onderhoudstaken. +- Maak taken idempotent en stel deadlines/backoff limits in. +- Opruimen met `ttlSecondsAfterFinished`. + +--- + +## 3. Samenvattende aanbevelingen +1. **Haal startup-acties uit je app-containers** → verplaats naar Jobs/CronJobs. +2. **Hou app-containers slank en simpel** → 1–2 workers per pod, schaal met replicas. +3. **Beheer Celery-taken betrouwbaar** → acks_late, visibility_timeout, idempotentie. +4. **Scheiding van verantwoordelijkheden** → workers, beat (indien nodig), flower los deployen. +5. **Monitoring & autoscaling** → gebruik health probes, resource limits, en KEDA voor workers. +6. **Security** → gebruik RBAC & Secrets om toegang netjes te beperken. +7. **Flower** → volstaat standalone, geen Airflow nodig. + +--- diff --git a/documentation/redis_db_best_practices.md b/documentation/redis_db_best_practices.md new file mode 100644 index 0000000..a697db9 --- /dev/null +++ b/documentation/redis_db_best_practices.md @@ -0,0 +1,48 @@ +# Redis Databases (db=0..15) — Best Practices anno 2025 + +## Wat zijn Redis DB’s? +- Redis ondersteunt meerdere logische databases (`0..15`, standaard). +- Elke DB heeft een eigen keyspace, maar **alles deelt dezelfde instance**: + - Geheugen (`maxmemory`) + - Eviction-policy + - Persistence-bestanden + - ACL’s (geen DB-specifieke rechten) + - Replicatie +- In **Redis Cluster** bestaat enkel DB 0. + +--- + +## Nadelen van meerdere DB’s +- **Geen isolatie**: eviction en memory zijn gedeeld. +- **Niet cluster-compatibel**: alleen DB 0 werkt. +- **Meer connection pools**: elke DB → aparte pool → meer sockets. +- **Moeilijke security**: ACL’s gelden niet per DB, enkel via key-prefixes. +- **Operationele verwarring**: `SELECT` vergeten → keys zoek je in de verkeerde DB. + +--- + +## Wanneer werden ze gebruikt? +- Vroeger voor simpele scheiding: bv. DB 0 cache, DB 1 sessions. +- Mogelijkheid om `FLUSHDB` te doen zonder alle data kwijt te zijn. +- Legacy clients/tools verwachtten meerdere DB’s. + +--- + +## Moderne best practices +✅ **Gebruik altijd DB 0** (zeker als je ooit naar Redis Cluster wil). +✅ **Organiseer data met key-prefixes** (bv. `cache:`, `sess:`, `celery:`). +✅ **Gebruik ACL’s per prefix** voor toegangscontrole. +✅ **Splits workloads in aparte instances** als je echte isolatie nodig hebt (cache vs sessions vs Celery). +✅ **Monitor eviction en memory** (`used_memory`, `evicted_keys`) in plaats van te vertrouwen op DB-splitsing. + +❌ **Gebruik geen meerdere DB’s** voor isolatie of multi-tenancy. +❌ **Verlaat je niet op DB’s** voor security of scaling. +❌ **Verwacht geen verschil in eviction/persistence** tussen DB’s. + +--- + +## Conclusie +- Redis-databases zijn een **historisch artefact**. +- Ze voegen **geen echte isolatie** toe en schalen niet mee naar Cluster. +- **Prefix + ACL’s** zijn de moderne manier om te scheiden. +- Voor echte isolatie: gebruik meerdere Redis-instanties of Redis Cluster. diff --git a/eveai_workers/processors/automagic_html_processor.py b/eveai_workers/processors/automagic_html_processor.py index f90b37e..fd3c4f1 100644 --- a/eveai_workers/processors/automagic_html_processor.py +++ b/eveai_workers/processors/automagic_html_processor.py @@ -1,5 +1,4 @@ import io -import pdfplumber from flask import current_app from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.output_parsers import StrOutputParser diff --git a/eveai_workers/processors/pdf_processor.py b/eveai_workers/processors/pdf_processor.py index cc14997..ff81f74 100644 --- a/eveai_workers/processors/pdf_processor.py +++ b/eveai_workers/processors/pdf_processor.py @@ -1,5 +1,4 @@ import io -import pdfplumber from flask import current_app from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.output_parsers import StrOutputParser diff --git a/nginx/nginx.conf b/nginx/nginx.conf index 353c6d8..4155747 100644 --- a/nginx/nginx.conf +++ b/nginx/nginx.conf @@ -18,11 +18,6 @@ http { include mime.types; default_type application/octet-stream; - # Define upstream servers - upstream eveai_chat_client { - server eveai_chat_client:5004; - } - log_format custom_log_format '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' @@ -99,7 +94,7 @@ http { # } location /chat-client/ { - proxy_pass http://eveai_chat_client/; + proxy_pass http://eveai_chat_client:8080/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; @@ -122,7 +117,7 @@ http { # include uwsgi_params; # uwsgi_pass 127.0.0.1:5001; # uwsgi_read_timeout 300; - proxy_pass http://eveai_app:5001/; + proxy_pass http://eveai_app:8080/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -193,7 +188,7 @@ http { add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - proxy_pass http://eveai_api:5003/; + proxy_pass http://eveai_api:8080/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/requirements.txt b/requirements.txt index 93c902a..5f40c36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,18 +24,16 @@ greenlet~=3.2.1 gunicorn~=23.0.0 Jinja2~=3.1.6 kombu~=5.3.7 -langchain~=0.3.25 -langchain-anthropic~=0.3.12 -langchain-community~=0.3.23 -langchain-core~=0.3.58 -langchain-mistralai~=0.2.10 -langchain-openai~=0.3.16 -langchain-postgres~=0.0.14 -langchain-text-splitters~=0.3.8 +langchain~=0.3.27 +langchain-community~=0.3.29 +langchain-core~=0.3.75 +langchain-mistralai~=0.2.11 +langchain-openai~=0.3.32 +langchain-postgres~=0.0.15 +langchain-text-splitters~=0.3.10 langcodes~=3.4.0 langdetect~=1.0.9 -langsmith~=0.1.81 -openai~=1.75.0 +openai~=1.102.0 pg8000~=1.31.2 pgvector~=0.2.5 pycryptodome~=3.20.0 @@ -65,13 +63,10 @@ cryptography~=43.0.0 graypy~=2.1.0 lxml~=5.3.0 pillow~=10.4.0 -pdfplumber~=0.11.4 PyPDF2~=3.0.1 flask-restx~=1.3.0 flask-healthz~=1.0.1 -langsmith~=0.1.121 anthropic~=0.50.0 -prometheus-client~=0.21.1 prometheus-flask-exporter~=0.23.2 flower~=2.0.1 psutil~=6.0.0 @@ -85,14 +80,14 @@ python-docx~=1.1.2 crewai~=0.152.0 sseclient~=0.0.27 termcolor~=2.5.0 -mistral-common~=1.5.5 -mistralai~=1.7.1 +mistral-common~=1.8.4 +mistralai~=1.9.9 contextvars~=2.4 -pandas~=2.2.3 -prometheus_client~=0.21.1 +pandas~=2.3.2 +prometheus_client~=0.22.1 scaleway~=2.9.0 html2text~=2025.4.15 -markdown~=3.8 +markdown~=3.8.2 python-json-logger~=2.0.7 qrcode[pil]==8.2 xxhash~=3.5.0 \ No newline at end of file diff --git a/scripts/run.py b/scripts/run.py new file mode 100644 index 0000000..2fa6861 --- /dev/null +++ b/scripts/run.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +import os +import sys +import importlib + +def main(): + component = os.getenv('COMPONENT_NAME', 'eveai_app') + role = os.getenv('ROLE', 'web') + + if role == 'web': + # Web services + from gevent import monkey + monkey.patch_all() + + try: + module = importlib.import_module(component) + app = module.create_app() + + except ImportError as e: + print(f"Error importing {component}: {e}", file=sys.stderr) + sys.exit(1) + except AttributeError as e: + print(f"Error: {component} module does not have create_app function: {e}", file=sys.stderr) + sys.exit(1) + + elif role in ['worker', 'beat']: + # Worker services + try: + module = importlib.import_module(component) + celery = module.celery + + except ImportError as e: + print(f"Error importing {component}: {e}", file=sys.stderr) + sys.exit(1) + except AttributeError as e: + print(f"Error: {component} module does not have celery object: {e}", file=sys.stderr) + sys.exit(1) + + else: + print(f"Unknown role: {role}", file=sys.stderr) + sys.exit(1) + +# For web services - expose app object for gunicorn +if os.getenv('ROLE', 'web') == 'web': + component = os.getenv('COMPONENT_NAME', 'eveai_app') + try: + from gevent import monkey + monkey.patch_all() + + module = importlib.import_module(component) + app = module.create_app() + except (ImportError, AttributeError) as e: + print(f"Error setting up app for {component}: {e}", file=sys.stderr) + app = None + +# For worker/beat services - expose celery object +elif os.getenv('ROLE') in ['worker', 'beat']: + component = os.getenv('COMPONENT_NAME', 'eveai_workers') + try: + module = importlib.import_module(component) + celery = module.celery + except (ImportError, AttributeError) as e: + print(f"Error setting up celery for {component}: {e}", file=sys.stderr) + celery = None + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/start.sh b/scripts/start.sh new file mode 100755 index 0000000..3381f71 --- /dev/null +++ b/scripts/start.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROLE="${ROLE:-web}" # web | worker | beat +PORT="${PORT:-8080}" # web poort (in k8s vaak 8080) +WORKERS="${WORKERS:-1}" # gunicorn workers (web) +WORKER_CLASS="${WORKER_CLASS:-gevent}" # web: gevent|sync +WORKER_CONN="${WORKER_CONN:-100}" # web: --worker-connections +LOGLEVEL="${LOGLEVEL:-info}" +MAX_REQ="${MAX_REQUESTS:-1000}" +MAX_JITTER="${MAX_REQUESTS_JITTER:-100}" +COMPONENT_NAME="${COMPONENT_NAME:-eveai_app}" # component name for dynamic import + +case "$ROLE" in + web) + echo "[start] role=web component=$COMPONENT_NAME port=$PORT workers=$WORKERS class=$WORKER_CLASS" + exec gunicorn -w "$WORKERS" -k "$WORKER_CLASS" \ + -b "0.0.0.0:${PORT}" --worker-connections "$WORKER_CONN" \ + --access-logfile - --error-logfile - --log-level "$LOGLEVEL" \ + --graceful-timeout 25 --timeout 30 --keep-alive 5 \ + --max-requests "$MAX_REQ" --max-requests-jitter "$MAX_JITTER" \ + scripts.run:app + ;; + worker) + echo "[start] role=worker component=$COMPONENT_NAME" + CONCURRENCY="${CELERY_CONCURRENCY:-2}" + exec celery -A scripts.run worker \ + --loglevel="${CELERY_LOGLEVEL:-INFO}" \ + --concurrency="${CONCURRENCY}" \ + --max-tasks-per-child="${CELERY_MAX_TASKS_PER_CHILD:-1000}" \ + --prefetch-multiplier="${CELERY_PREFETCH:-1}" -O fair + ;; + beat) + echo "[start] role=beat component=$COMPONENT_NAME" + exec celery -A scripts.run beat \ + --loglevel="${CELERY_LOGLEVEL:-INFO}" + ;; + *) + echo "Unknown ROLE=$ROLE" >&2; exit 1 + ;; +esac diff --git a/scripts/start_eveai_api.sh b/scripts/start_eveai_api.sh deleted file mode 100755 index 4075f5c..0000000 --- a/scripts/start_eveai_api.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -cd "/app" || exit 1 -export PROJECT_DIR="/app" -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages - -# Set FLASK_APP environment variables -export FLASK_APP=${PROJECT_DIR}/scripts/run_eveai_app.py # Adjust the path to your Flask app entry point - - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Start Flask app -gunicorn -w 1 -k gevent -b 0.0.0.0:5003 --worker-connections 100 scripts.run_eveai_api:app diff --git a/scripts/start_eveai_app.sh b/scripts/start_eveai_app.sh deleted file mode 100755 index 91c7ae3..0000000 --- a/scripts/start_eveai_app.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -cd "/app" || exit 1 -export PYTHONPATH="$PYTHONPATH:/app/" - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Wait for the database to be ready -echo "Waiting for database to be ready" -until pg_isready -h $DB_HOST -p $DB_PORT; do - echo "Postgres is unavailable - sleeping" - sleep 2 -done -echo "Postgres is up - executing commands" - -export PGPASSWORD=$DB_PASS -# Check if the database exists and initialize if not -if ! psql -U $DB_USER -h $DB_HOST -p $DB_PORT -d $DB_NAME -c '\dt' | grep -q 'No relations found'; then - echo "Database eveai does not exist or is empty. Initializing..." - psql -U $DB_USER -h $DB_HOST -p $DB_PORT -d postgres -c "CREATE DATABASE $DB_NAME;" - psql -U $DB_USER -h $DB_HOST -p $DB_PORT -d $DB_NAME -c "CREATE EXTENSION IF NOT EXISTS vector;" -fi - -echo "Applying migrations to the public and tenant schema..." - -# Set FLASK_APP environment variables -PROJECT_DIR="/app" -export FLASK_APP=${PROJECT_DIR}/scripts/run_eveai_app.py # Adjust the path to your Flask app entry point -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages - -# Run Alembic upgrade for the public schema -echo "Applying migrations to the public schema..." -flask db upgrade -d "${PROJECT_DIR}/migrations/public" -echo "Finished applying migrations to the public schema..." - -# Run Alembic upgrade for the tenant schema -echo "Applying migrations to the tenant schema..." -flask db upgrade -d "${PROJECT_DIR}/migrations/tenant" -echo "Finished applying migrations to the tenant schema..." - -# Set flask environment variables -#export FLASK_ENV=development # Use 'production' as appropriate -#export FLASK_DEBUG=1 # Use 0 for production - -# Initialize initial data (tenant and user) -echo "Initializing initial tenant and user..." -python ${PROJECT_DIR}/scripts/initialize_data.py # Adjust the path to your initialization script - -# Start Flask app -# gunicorn -w 1 -k gevent -b 0.0.0.0:5001 --worker-connections 100 scripts.run_eveai_app:app -gunicorn -w 1 -k gevent -b 0.0.0.0:5001 --worker-connections 100 scripts.run_eveai_app:app diff --git a/scripts/start_eveai_beat.sh b/scripts/start_eveai_beat.sh deleted file mode 100755 index 233b724..0000000 --- a/scripts/start_eveai_beat.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -cd "/app/" || exit 1 -export PROJECT_DIR="/app" -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Start Celery Beat -celery -A eveai_beat.celery beat --scheduler=redbeat.RedBeatScheduler --loglevel=debug & - -# Start a worker for the 'llm_interactions' queue with auto-scaling - not necessary, in eveai_chat_workers -# celery -A eveai_workers.celery worker --loglevel=info - Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h & - -# Wait for all background processes to finish -wait diff --git a/scripts/start_eveai_chat_client.sh b/scripts/start_eveai_chat_client.sh deleted file mode 100755 index 84b4271..0000000 --- a/scripts/start_eveai_chat_client.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -cd "/app" || exit 1 -export PYTHONPATH="$PYTHONPATH:/app/" - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Wait for the database to be ready -echo "Waiting for database to be ready" -until pg_isready -h $DB_HOST -p $DB_PORT; do - echo "Postgres is unavailable - sleeping" - sleep 2 -done -echo "Postgres is up - executing commands" - -# Set FLASK_APP environment variables -PROJECT_DIR="/app" -export FLASK_APP=${PROJECT_DIR}/scripts/run_eveai_chat_client.py -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" - -# Start Flask app with Gunicorn -gunicorn -w 1 -k gevent -b 0.0.0.0:5004 --worker-connections 100 scripts.run_eveai_chat_client:app diff --git a/scripts/start_eveai_chat_workers.sh b/scripts/start_eveai_chat_workers.sh deleted file mode 100755 index b2c2cc2..0000000 --- a/scripts/start_eveai_chat_workers.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -cd "/app/" || exit 1 -export PROJECT_DIR="/app" -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Start a worker for the 'llm_interactions' queue with auto-scaling -celery -A eveai_chat_workers.celery worker --loglevel=info -Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h & - -# Wait for all background processes to finish -wait diff --git a/scripts/start_eveai_entitlements.sh b/scripts/start_eveai_entitlements.sh deleted file mode 100755 index a84ad9a..0000000 --- a/scripts/start_eveai_entitlements.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -cd "/app/" || exit 1 -export PROJECT_DIR="/app" -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Start a worker for the 'embeddings' queue with higher concurrency -celery -A eveai_entitlements.celery worker --loglevel=debug -Q entitlements --autoscale=2,8 --hostname=entitlements_worker@%h & - -# Start a worker for the 'llm_interactions' queue with auto-scaling - not necessary, in eveai_chat_workers -# celery -A eveai_workers.celery worker --loglevel=info - Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h & - -# Wait for all background processes to finish -wait diff --git a/scripts/start_eveai_workers.sh b/scripts/start_eveai_workers.sh deleted file mode 100755 index d03a45f..0000000 --- a/scripts/start_eveai_workers.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -cd "/app/" || exit 1 -export PROJECT_DIR="/app" -export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # Include the app directory in the Python path & patched packages - -# Ensure we can write the logs -chown -R appuser:appuser /app/logs - -# Start a worker for the 'embeddings' queue with higher concurrency -celery -A eveai_workers.celery worker --loglevel=debug -Q embeddings --autoscale=2,8 --hostname=embeddings_worker@%h & - -# Start a worker for the 'llm_interactions' queue with auto-scaling - not necessary, in eveai_chat_workers -# celery -A eveai_workers.celery worker --loglevel=info - Q llm_interactions --autoscale=2,8 --hostname=interactions_worker@%h & - -# Wait for all background processes to finish -wait