diff --git a/check_running_services.sh b/check_running_services.sh new file mode 100644 index 0000000..13616d3 --- /dev/null +++ b/check_running_services.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Diagnostic script to check what services are running + +echo "=== KIND CLUSTER STATUS ===" +echo "Namespaces:" +kubectl get namespaces | grep eveai + +echo -e "\nPods in eveai-dev:" +kubectl get pods -n eveai-dev + +echo -e "\nServices in eveai-dev:" +kubectl get services -n eveai-dev + +echo -e "\n=== TEST CONTAINERS STATUS ===" +echo "Running test containers:" +podman ps | grep eveai_test + +echo -e "\n=== PORT ANALYSIS ===" +echo "What's listening on port 3080:" +lsof -i :3080 2>/dev/null || echo "Nothing found" + +echo -e "\nWhat's listening on port 4080:" +lsof -i :4080 2>/dev/null || echo "Nothing found" + +echo -e "\n=== SOLUTION ===" +echo "The application you see is from TEST CONTAINERS (6 days old)," +echo "NOT from the Kind cluster (3 minutes old)." +echo "" +echo "To test Kind cluster:" +echo "1. Stop test containers: podman stop eveai_test_nginx_1 eveai_test_eveai_app_1" +echo "2. Deploy Kind services: kup-all-structured" +echo "3. Restart test containers if needed" \ No newline at end of file diff --git a/config/tasks/traicie/TRAICIE_GET_COMPETENCIES_TASK/1.1.0.yaml b/config/tasks/traicie/TRAICIE_GET_COMPETENCIES_TASK/1.1.0.yaml index b0c1ffa..c2a29ec 100644 --- a/config/tasks/traicie/TRAICIE_GET_COMPETENCIES_TASK/1.1.0.yaml +++ b/config/tasks/traicie/TRAICIE_GET_COMPETENCIES_TASK/1.1.0.yaml @@ -23,7 +23,7 @@ task_description: > Create a prioritised list of the 10 most critical competencies as defined above, ranked in importance. Treat this as a logical and professional reasoning exercise. - Respect the language of the vacancy text, and return answers / output in the same language. + Respect the language of the vacancy text, and return answers / output in the same language. Only use plain text. {custom_description} diff --git a/content/changelog/1.0/1.0.0.md b/content/changelog/1.0/1.0.0.md index 4f60ad7..1784952 100644 --- a/content/changelog/1.0/1.0.0.md +++ b/content/changelog/1.0/1.0.0.md @@ -5,6 +5,23 @@ All notable changes to EveAI will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.0.1-beta] + +### Changed +- Podman now replaces Docker for building images +- Local registry now replaces Docker Hub +- Start for k8s integration +- RAG possibilities throughout usage of the TRAICIE_SELECTION_SPECIALIST + +### Fixed +- TRA-67 - initial edit of Tenant Make --> 2-step proces +- TRA-68 - Correction of javascript for json editor, resulting in Asset Changes not being saved +- TRA-70 - Wrong File Size display for Assets +- TRA-69 - Wrong number of questions in TRAICIE_KO_INTERVIEW_DEFINITION_SPECIALIST (required correction in TRACIE_ROLE_DEFINITION_SPECIALIST) + +### Security +- In case of vulnerabilities. + ## [3.0.0-beta] ### Added diff --git a/docker/build_and_push_eveai.sh b/docker/build_and_push_eveai.sh index b67f238..6434757 100755 --- a/docker/build_and_push_eveai.sh +++ b/docker/build_and_push_eveai.sh @@ -3,19 +3,29 @@ # Exit on any error set -e -source ./docker_env_switch.sh dev +source ./podman_env_switch.sh dev # Load environment variables source .env -# Docker registry -REGISTRY="josakola" +# Check if podman is available +if ! command -v podman &> /dev/null; then + echo "Error: podman not found" + exit 1 +fi + +echo "Using container runtime: podman" + +# Local registry +REGISTRY="registry.ask-eve-ai-local.com" +# Account prefix voor consistency met Docker Hub +ACCOUNT="josakola" # Tag (you might want to use a version or git commit hash) TAG="latest" -# Platforms to build for -PLATFORMS="linux/amd64,linux/arm64" +# Single platform - AMD64 only for simplicity +PLATFORM="linux/amd64" # Default action ACTION="both" @@ -28,13 +38,14 @@ DEBUG="" # Function to display usage information usage() { echo "Usage: $0 [-b|-p] [--no-cache] [--progress=plain] [--debug] [service1 service2 ...]" - echo " -b: Build only (for current platform)" - echo " -p: Push only (multi-platform)" + echo " -b: Build only" + echo " -p: Push only" echo " --no-cache: Perform a clean build without using cache" echo " --progress=plain: Show detailed progress of the build" echo " --debug: Enable debug mode for the build" echo " If no option is provided, both build and push will be performed." echo " If no services are specified, all eveai_ services and nginx will be processed." + echo " All images are built for AMD64 platform (compatible with both x86_64 and Apple Silicon via emulation)." } # Parse command-line options @@ -92,47 +103,57 @@ process_service() { return 1 fi + # Construct image names + LOCAL_IMAGE_NAME="$ACCOUNT/$SERVICE:$TAG" + REGISTRY_IMAGE_NAME="$REGISTRY/$ACCOUNT/$SERVICE:$TAG" + + echo "Building for platform: $PLATFORM" + echo "Local tag: $LOCAL_IMAGE_NAME" + echo "Registry tag: $REGISTRY_IMAGE_NAME" + # Build and/or push based on ACTION if [ "$ACTION" = "build" ]; then - echo "Building $SERVICE for current platform..." - docker build \ + echo "Building $SERVICE for $PLATFORM..." + podman build \ + --platform "$PLATFORM" \ $NO_CACHE \ $PROGRESS \ $DEBUG \ - -t "$REGISTRY/$SERVICE:$TAG" \ - -f "$CONTEXT/$DOCKERFILE" \ - "$CONTEXT" - elif [ "$ACTION" = "push" ]; then - echo "Building and pushing $SERVICE for multiple platforms..." - docker buildx build \ - $NO_CACHE \ - $PROGRESS \ - $DEBUG \ - --platform "$PLATFORMS" \ - -t "$REGISTRY/$SERVICE:$TAG" \ - -f "$CONTEXT/$DOCKERFILE" \ - "$CONTEXT" \ - --push - else - echo "Building $SERVICE for current platform..." - docker build \ - $NO_CACHE \ - $PROGRESS \ - $DEBUG \ - -t "$REGISTRY/$SERVICE:$TAG" \ + -t "$LOCAL_IMAGE_NAME" \ + -t "$REGISTRY_IMAGE_NAME" \ -f "$CONTEXT/$DOCKERFILE" \ "$CONTEXT" - echo "Building and pushing $SERVICE for multiple platforms..." - docker buildx build \ + elif [ "$ACTION" = "push" ]; then + echo "Building and pushing $SERVICE for $PLATFORM..." + podman build \ + --platform "$PLATFORM" \ $NO_CACHE \ $PROGRESS \ $DEBUG \ - --platform "$PLATFORMS" \ - -t "$REGISTRY/$SERVICE:$TAG" \ + -t "$LOCAL_IMAGE_NAME" \ + -t "$REGISTRY_IMAGE_NAME" \ -f "$CONTEXT/$DOCKERFILE" \ - "$CONTEXT" \ - --push + "$CONTEXT" + + echo "Pushing $SERVICE to registry..." + podman push "$REGISTRY_IMAGE_NAME" + + else + # Both build and push + echo "Building $SERVICE for $PLATFORM..." + podman build \ + --platform "$PLATFORM" \ + $NO_CACHE \ + $PROGRESS \ + $DEBUG \ + -t "$LOCAL_IMAGE_NAME" \ + -t "$REGISTRY_IMAGE_NAME" \ + -f "$CONTEXT/$DOCKERFILE" \ + "$CONTEXT" + + echo "Pushing $SERVICE to registry..." + podman push "$REGISTRY_IMAGE_NAME" fi } @@ -146,31 +167,25 @@ else SERVICES=("$@") fi -# Check if eveai_builder exists, if not create it -if ! docker buildx inspect eveai_builder > /dev/null 2>&1; then - echo "Creating eveai_builder..." - docker buildx create --name eveai_builder -fi - -# Use eveai_builder -echo "Using eveai_builder..." -docker buildx use eveai_builder +echo "Using simplified AMD64-only approach for maximum compatibility..." +echo "Images will be tagged as: $REGISTRY/$ACCOUNT/[service]:$TAG" # Loop through services for SERVICE in "${SERVICES[@]}"; do if [[ "$SERVICE" == "nginx" ]]; then - ./copy_specialist_svgs.sh ../config ../nginx/static/assets + ./copy_specialist_svgs.sh ../config ../nginx/static/assets 2>/dev/null || echo "Warning: copy_specialist_svgs.sh not found or failed" fi if [[ "$SERVICE" == "nginx" || "$SERVICE" == eveai_* || "$SERVICE" == "flower" || "$SERVICE" == "prometheus" || "$SERVICE" == "grafana" ]]; then if process_service "$SERVICE"; then - echo "Successfully processed $SERVICE" + echo "✅ Successfully processed $SERVICE" else - echo "Failed to process $SERVICE" + echo "❌ Failed to process $SERVICE" fi else - echo "Skipping $SERVICE as it's not nginx, flower, prometheus, grafana or doesn't start with eveai_" + echo "⏭️ Skipping $SERVICE as it's not nginx, flower, prometheus, grafana or doesn't start with eveai_" fi done -echo -e "\033[35mAll specified services processed.\033[0m" -echo -e "\033[35mFinished at $(date +"%d/%m/%Y %H:%M:%S")\033[0m" +echo -e "\033[32m✅ All specified services processed successfully!\033[0m" +echo -e "\033[32m📦 Images are available locally and in registry\033[0m" +echo -e "\033[32m🕐 Finished at $(date +"%d/%m/%Y %H:%M:%S")\033[0m" \ No newline at end of file diff --git a/docker/compose_dev.yaml b/docker/compose_dev.yaml index 68a436d..1d4dfea 100644 --- a/docker/compose_dev.yaml +++ b/docker/compose_dev.yaml @@ -1,13 +1,4 @@ -# Comments are provided throughout this file to help you get started. -# If you need more help, visit the Docker Compose reference guide at -# https://docs.docker.com/go/compose-spec-reference/ - -# Here the instructions define your application as a service called "server". -# This service is built from the Dockerfile in the current directory. -# You can add other services your application may depend on here, such as a -# database or a cache. For examples, see the Awesome Compose repository: -# https://github.com/docker/awesome-compose - +# Podman Compose compatible versie met port schema compliance x-common-variables: &common-variables DB_HOST: db DB_USER: luke @@ -45,16 +36,12 @@ x-common-variables: &common-variables services: nginx: - image: josakola/nginx:latest + image: ${REGISTRY_PREFIX:-}josakola/nginx:latest build: context: .. dockerfile: ./docker/nginx/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 ports: - - 80:80 - - 8080:8080 + - 3080:80 # Dev nginx proxy volgens port schema environment: <<: *common-variables volumes: @@ -72,18 +59,15 @@ services: - eveai_api - eveai_chat_client networks: - - eveai-network + - eveai-dev-network eveai_app: - image: josakola/eveai_app:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_app:latest build: context: .. dockerfile: ./docker/eveai_app/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 ports: - - 5001:5001 + - 3001:5001 # Dev app volgens port schema expose: - 8000 environment: @@ -108,20 +92,17 @@ services: healthcheck: test: ["CMD", "curl", "-f", "http://localhost:5001/healthz/ready"] interval: 30s - timeout: 1s + timeout: 10s retries: 3 - start_period: 30s + start_period: 60s networks: - - eveai-network + - eveai-dev-network eveai_workers: - image: josakola/eveai_workers:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_workers:latest build: context: .. dockerfile: ./docker/eveai_workers/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 expose: - 8000 environment: @@ -142,18 +123,15 @@ services: minio: condition: service_healthy networks: - - eveai-network + - eveai-dev-network eveai_chat_client: - image: josakola/eveai_chat_client:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_chat_client:latest build: context: .. dockerfile: ./docker/eveai_chat_client/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 ports: - - 5004:5004 + - 3004:5004 # Dev chat client volgens port schema expose: - 8000 environment: @@ -176,20 +154,17 @@ services: healthcheck: test: ["CMD", "curl", "-f", "http://localhost:5004/healthz/ready"] interval: 30s - timeout: 1s + timeout: 10s retries: 3 - start_period: 30s + start_period: 60s networks: - - eveai-network + - eveai-dev-network eveai_chat_workers: - image: josakola/eveai_chat_workers:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_chat_workers:latest build: context: .. dockerfile: ./docker/eveai_chat_workers/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 expose: - 8000 environment: @@ -208,26 +183,20 @@ services: redis: condition: service_healthy networks: - - eveai-network + - eveai-dev-network eveai_api: - image: josakola/eveai_api:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_api:latest build: context: .. dockerfile: ./docker/eveai_api/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 ports: - - 5003:5003 + - 3003:5003 # Dev API volgens port schema expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_api - WORDPRESS_HOST: host.docker.internal - WORDPRESS_PORT: 10003 - WORDPRESS_PROTOCOL: http volumes: - ../eveai_api:/app/eveai_api - ../common:/app/common @@ -245,20 +214,17 @@ services: healthcheck: test: [ "CMD", "curl", "-f", "http://localhost:5003/healthz/ready" ] interval: 30s - timeout: 1s + timeout: 10s retries: 3 - start_period: 30s + start_period: 60s networks: - - eveai-network + - eveai-dev-network eveai_beat: - image: josakola/eveai_beat:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_beat:latest build: context: .. dockerfile: ./docker/eveai_beat/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 environment: <<: *common-variables COMPONENT_NAME: eveai_beat @@ -273,16 +239,13 @@ services: redis: condition: service_healthy networks: - - eveai-network + - eveai-dev-network eveai_entitlements: - image: josakola/eveai_entitlements:latest + image: ${REGISTRY_PREFIX:-}josakola/eveai_entitlements:latest build: context: .. dockerfile: ./docker/eveai_entitlements/Dockerfile - platforms: - - linux/amd64 - - linux/arm64 expose: - 8000 environment: @@ -303,13 +266,13 @@ services: minio: condition: service_healthy networks: - - eveai-network + - eveai-dev-network db: hostname: db image: ankane/pgvector ports: - - 5432:5432 + - 3005:5432 # Dev database volgens port schema (vermijd standaard 5432) restart: always environment: - POSTGRES_DB=eveai @@ -324,13 +287,13 @@ services: timeout: 5s retries: 5 networks: - - eveai-network + - eveai-dev-network redis: image: redis:7.2.5 restart: always ports: - - "6379:6379" + - "3006:6379" # Dev Redis volgens port schema (vermijd standaard 6379) volumes: - ./db/redis:/data healthcheck: @@ -339,10 +302,10 @@ services: timeout: 5s retries: 5 networks: - - eveai-network + - eveai-dev-network flower: - image: josakola/flower:latest + image: ${REGISTRY_PREFIX:-}josakola/flower:latest build: context: .. dockerfile: ./docker/flower/Dockerfile @@ -351,17 +314,17 @@ services: volumes: - ../scripts:/app/scripts ports: - - "5555:5555" + - "3007:5555" # Dev Flower volgens port schema depends_on: - redis networks: - - eveai-network + - eveai-dev-network minio: image: minio/minio ports: - - "9000:9000" - - "9001:9001" + - "3008:9000" # Dev MinIO volgens port schema + - "3009:9001" # Dev MinIO console expose: - 9000 volumes: @@ -376,18 +339,17 @@ services: interval: 30s timeout: 20s retries: 3 - start_period: 30s + start_period: 60s networks: - - eveai-network + - eveai-dev-network prometheus: - image: prom/prometheus:latest + image: ${REGISTRY_PREFIX:-}josakola/prometheus:latest build: context: ./prometheus dockerfile: Dockerfile - container_name: prometheus ports: - - "9090:9090" + - "3010:9090" # Dev Prometheus volgens port schema volumes: - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml - ./prometheus/data:/prometheus @@ -399,24 +361,23 @@ services: - '--web.enable-lifecycle' restart: unless-stopped networks: - - eveai-network + - eveai-dev-network pushgateway: image: prom/pushgateway:latest restart: unless-stopped ports: - - "9091:9091" + - "3011:9091" # Dev Pushgateway volgens port schema networks: - - eveai-network + - eveai-dev-network grafana: - image: grafana/grafana:latest + image: ${REGISTRY_PREFIX:-}josakola/grafana:latest build: context: ./grafana dockerfile: Dockerfile - container_name: grafana ports: - - "3000:3000" + - "3012:3000" # Dev Grafana volgens port schema volumes: - ./grafana/provisioning:/etc/grafana/provisioning - ./grafana/data:/var/lib/grafana @@ -428,21 +389,12 @@ services: depends_on: - prometheus networks: - - eveai-network + - eveai-dev-network networks: - eveai-network: + eveai-dev-network: driver: bridge - # This enables the containers to access the host network - driver_opts: - com.docker.network.bridge.host_ipc: "true" volumes: minio_data: - eveai_logs: -# db-data: -# redis-data: -# tenant-files: -#secrets: -# db-password: -# file: ./db/password.txt + eveai_logs: \ No newline at end of file diff --git a/docker/compose_test.yaml b/docker/compose_test.yaml index 7aedc10..33db5a7 100644 --- a/docker/compose_test.yaml +++ b/docker/compose_test.yaml @@ -12,7 +12,7 @@ x-common-variables: &common-variables DB_HOST: minty.ask-eve-ai-local.com DB_USER: luke DB_PASS: 'Skywalker!' - DB_NAME: eveai + DB_NAME: eveai_test DB_PORT: '5432' FLASK_ENV: test FLASK_DEBUG: true @@ -43,36 +43,36 @@ x-common-variables: &common-variables SW_EMAIL_NAME: "Evie Admin (test)" SW_PROJECT: "f282f55a-ea52-4538-a979-5bcb890717ab" +name: eveai_test + services: nginx: - image: josakola/nginx:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/nginx:latest ports: - - 80:80 - - 8080:8080 + - 4080:80 environment: <<: *common-variables volumes: - - eveai_logs:/var/log/nginx + - test_eveai_logs:/var/log/nginx depends_on: - eveai_app - eveai_api - eveai_chat_client networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_app: - image: josakola/eveai_app:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_app:latest ports: - - 5001:5001 + - 4001:5001 expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_app volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy @@ -85,40 +85,38 @@ services: retries: 3 start_period: 30s networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_workers: - image: josakola/eveai_workers:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_workers:latest expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_workers volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy minio: condition: service_healthy networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_chat_client: - image: josakola/eveai_chat_client:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_chat_client:latest ports: - - 5004:5004 + - 4004:5004 expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_chat_client volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy @@ -131,38 +129,36 @@ services: retries: 3 start_period: 30s networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_chat_workers: - image: josakola/eveai_chat_workers:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_chat_workers:latest expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_chat_workers volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_api: - image: josakola/eveai_api:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_api:latest ports: - - 5003:5003 + - 4003:5003 expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_api volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy @@ -175,80 +171,78 @@ services: retries: 3 start_period: 30s networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_beat: - image: josakola/eveai_beat:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_beat:latest environment: <<: *common-variables COMPONENT_NAME: eveai_beat volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped eveai_entitlements: - image: josakola/eveai_entitlements:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/eveai_entitlements:latest expose: - 8000 environment: <<: *common-variables COMPONENT_NAME: eveai_entitlements volumes: - - eveai_logs:/app/logs - - crewai_storage:/app/crewai_storage + - test_eveai_logs:/app/logs depends_on: redis: condition: service_healthy minio: condition: service_healthy networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped redis: image: redis:7.2.5 - restart: no + restart: unless-stopped ports: - - "6379:6379" + - "4006:6379" volumes: - - redisdata:/data + - test_redisdata:/data healthcheck: test: [ "CMD", "redis-cli", "ping" ] interval: 10s timeout: 5s retries: 5 networks: - - eveai-network + - eveai-test-network flower: - image: josakola/flower:${EVEAI_VERSION:-latest} + image: ${REGISTRY_PREFIX:-}josakola/flower:latest environment: <<: *common-variables ports: - - "5555:5555" + - "4007:5555" depends_on: - redis networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped minio: image: minio/minio ports: - - "9000:9000" - - "9001:9001" + - "4008:9000" + - "4009:9001" expose: - 9000 volumes: - - miniodata:/data - - minioconfig:/root/.minio + - test_miniodata:/data + - test_minioconfig:/root/.minio environment: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin} @@ -260,64 +254,57 @@ services: retries: 3 start_period: 30s networks: - - eveai-network - restart: "no" + - eveai-test-network + restart: unless-stopped prometheus: - image: josakola/prometheus:${EVEAI_VERSION:-latest} - container_name: prometheus + image: ${REGISTRY_PREFIX:-}josakola/prometheus:${EVEAI_VERSION:-latest} ports: - - "9090:9090" + - "4010:9090" volumes: - - prometheusdata:/prometheus + - test_prometheusdata:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' - restart: no + restart: unless-stopped networks: - - eveai-network + - eveai-test-network pushgateway: image: prom/pushgateway:latest restart: unless-stopped ports: - - "9091:9091" + - "4011:9091" networks: - - eveai-network + - eveai-test-network grafana: - image: josakola/grafana:${EVEAI_VERSION:-latest} - container_name: grafana + image: ${REGISTRY_PREFIX:-}josakola/grafana:${EVEAI_VERSION:-latest} ports: - - "3000:3000" + - "4012:3000" volumes: - - grafanadata:/var/lib/grafana + - test_grafanadata:/var/lib/grafana environment: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=admin - GF_USERS_ALLOW_SIGN_UP=false - restart: no + restart: unless-stopped depends_on: - prometheus networks: - - eveai-network + - eveai-test-network networks: - eveai-network: + eveai-test-network: driver: bridge - # This enables the containers to access the host network - driver_opts: - com.docker.network.bridge.host_ipc: "true" volumes: - eveai_logs: - pgdata: - redisdata: - miniodata: - minioconfig: - prometheusdata: - grafanadata: - crewai_storage: + test_eveai_logs: + test_redisdata: + test_miniodata: + test_minioconfig: + test_prometheusdata: + test_grafanadata: diff --git a/docker/docker_env_switch.sh b/docker/docker_env_switch.sh deleted file mode 100755 index 1cfa407..0000000 --- a/docker/docker_env_switch.sh +++ /dev/null @@ -1,155 +0,0 @@ -#!/bin/zsh -# or use #!/usr/bin/env zsh - -# Function to display usage information -usage() { - echo "Usage: source $0 [version]" - echo " environment: The environment to use (dev, prod, test, integration, bugfix)" - echo " version : (Optional) Specific release version to deploy" - echo " If not specified, uses 'latest' (except for dev environment)" -} - -# Replace the existing check at the beginning of docker_env_switch.sh -# Check if the script is sourced -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - # Script is being executed directly from terminal - echo "Error: This script must be sourced, not executed directly." - echo "Please run: source $0 [version]" - exit 1 -fi -# If we reach here, script is being sourced (either by terminal or another script) - -# Check if an environment is provided -if [ $# -eq 0 ]; then - usage - return 1 -fi - -ENVIRONMENT=$1 -VERSION=${2:-latest} # Default to latest if not specified - -# Set variables based on the environment -case $ENVIRONMENT in - dev) - DOCKER_CONTEXT="default" - COMPOSE_FILE="compose_dev.yaml" - VERSION="latest" # Always use latest for dev - ;; - prod) - DOCKER_CONTEXT="mxz536.stackhero-network.com" - COMPOSE_FILE="compose_stackhero.yaml" - ;; - test) - DOCKER_CONTEXT="test-environment" # Change to your actual test Docker context - COMPOSE_FILE="compose_test.yaml" - ;; - integration) - DOCKER_CONTEXT="integration-environment" # Change to your actual integration Docker context - COMPOSE_FILE="compose_integration.yaml" - ;; - bugfix) - DOCKER_CONTEXT="bugfix-environment" # Change to your actual bugfix Docker context - COMPOSE_FILE="compose_bugfix.yaml" - ;; - *) - echo "Invalid environment: $ENVIRONMENT" - usage - return 1 - ;; -esac - -# Set Docker account -DOCKER_ACCOUNT="josakola" - -# Check if Docker context exists -if ! docker context ls --format '{{.Name}}' | grep -q "^$DOCKER_CONTEXT$"; then - echo "Warning: Docker context '$DOCKER_CONTEXT' does not exist." - - # Prompt user if they want to create the context - if [[ "$DOCKER_CONTEXT" != "default" ]]; then - echo "Do you want to set up this context now? (y/n): " - read CREATE_CONTEXT - if [[ "$CREATE_CONTEXT" == "y" || "$CREATE_CONTEXT" == "Y" ]]; then - # You would add here the specific code to create each context type - # For example, for remote contexts you might need SSH settings - echo "Please specify the Docker host URL (e.g., ssh://user@remote_host or tcp://remote_host:2375):" - read DOCKER_HOST - - docker context create "$DOCKER_CONTEXT" --docker "host=$DOCKER_HOST" - if [ $? -ne 0 ]; then - echo "Failed to create Docker context. Please create it manually." - return 1 - fi - else - echo "Using default context instead." - DOCKER_CONTEXT="default" - fi - fi -fi - -# Check if compose file exists -if [ ! -f "$COMPOSE_FILE" ]; then - echo "Warning: Compose file '$COMPOSE_FILE' does not exist." - echo "Do you want to create it based on compose_dev.yaml? (y/n): " - read CREATE_FILE - if [[ "$CREATE_FILE" == "y" || "$CREATE_FILE" == "Y" ]]; then - # Create new compose file based on compose_dev.yaml with version variables - sed 's/\(image: josakola\/[^:]*\):latest/\1:${EVEAI_VERSION:-latest}/g' compose_dev.yaml > "$COMPOSE_FILE" - echo "Created $COMPOSE_FILE with version placeholders." - else - echo "Cannot proceed without a valid compose file." - return 1 - fi -fi - -# Switch Docker context -echo "Switching to Docker context: $DOCKER_CONTEXT" -docker context use $DOCKER_CONTEXT - -# Set environment variables -export COMPOSE_FILE=$COMPOSE_FILE -export EVEAI_VERSION=$VERSION -export DOCKER_ACCOUNT=$DOCKER_ACCOUNT - -echo "Set COMPOSE_FILE to $COMPOSE_FILE" -echo "Set EVEAI_VERSION to $VERSION" -echo "Set DOCKER_ACCOUNT to $DOCKER_ACCOUNT" - -docker-compose() { - docker compose -f $COMPOSE_FILE "$@" -} - -dc() { - docker compose -f $COMPOSE_FILE "$@" -} - -dcup() { - docker compose -f $COMPOSE_FILE up -d --remove-orphans "$@" -} - -dcdown() { - docker compose -f $COMPOSE_FILE down "$@" -} - -dcps() { - docker compose -f $COMPOSE_FILE ps "$@" -} - -dclogs() { - docker compose -f $COMPOSE_FILE logs "$@" -} - -dcpull() { - docker compose -f $COMPOSE_FILE pull "$@" -} - -dcrefresh() { - docker compose -f $COMPOSE_FILE pull && docker compose -f $COMPOSE_FILE up -d --remove-orphans "$@" -} - -# Exporteer de functies zodat ze beschikbaar zijn in andere scripts -export -f docker-compose dc dcup dcdown dcps dclogs dcpull dcrefresh - - -echo "Docker environment switched to $ENVIRONMENT with version $VERSION" -echo "You can now use 'docker-compose', 'dc', 'dcup', 'dcdown', 'dcps', 'dclogs', 'dcpull' or 'dcrefresh' commands" \ No newline at end of file diff --git a/docker/podman_env_switch.sh b/docker/podman_env_switch.sh new file mode 100755 index 0000000..a14e68d --- /dev/null +++ b/docker/podman_env_switch.sh @@ -0,0 +1,257 @@ +#!/usr/bin/env zsh + +# Function to display usage information +usage() { + echo "Usage: source $0 [version]" + echo " environment: The environment to use (dev, prod, test, integration, bugfix)" + echo " version : (Optional) Specific release version to deploy" + echo " If not specified, uses 'latest' (except for dev environment)" +} + +# Check if the script is sourced - improved for both bash and zsh +is_sourced() { + if [[ -n "$ZSH_VERSION" ]]; then + # In zsh, check if we're in a sourced context + [[ "$ZSH_EVAL_CONTEXT" =~ "(:file|:cmdsubst)" ]] || [[ "$0" != "$ZSH_ARGZERO" ]] + else + # In bash, compare BASH_SOURCE with $0 + [[ "${BASH_SOURCE[0]}" != "${0}" ]] + fi +} + +if ! is_sourced; then + echo "Error: This script must be sourced, not executed directly." + echo "Please run: source $0 [version]" + if [[ -n "$ZSH_VERSION" ]]; then + return 1 2>/dev/null || exit 1 + else + exit 1 + fi +fi + +# Check if an environment is provided +if [ $# -eq 0 ]; then + usage + return 1 +fi + +ENVIRONMENT=$1 +VERSION=${2:-latest} # Default to latest if not specified + +# Check if podman and podman-compose are available +if ! command -v podman &> /dev/null; then + echo "Error: podman is not installed or not in PATH" + echo "Please install podman first" + return 1 +fi + +if ! command -v podman-compose &> /dev/null; then + echo "Error: podman-compose is not installed or not in PATH" + echo "Please install podman-compose first" + return 1 +fi + +CONTAINER_CMD="podman" +# Store the actual path to podman-compose to avoid recursion +COMPOSE_CMD_PATH=$(command -v podman-compose) + +echo "Using container runtime: $CONTAINER_CMD" +echo "Using compose command: $COMPOSE_CMD_PATH" + +# Set default platform to AMD64 for consistency +export BUILDAH_PLATFORM=linux/amd64 +export PODMAN_PLATFORM=linux/amd64 + +# Set variables based on the environment +case $ENVIRONMENT in + dev) + PODMAN_CONNECTION="default" + COMPOSE_FILE="compose_dev.yaml" + REGISTRY_PREFIX="" + COMPOSE_PROJECT_NAME="eveai_dev" + VERSION="latest" # Always use latest for dev + ;; + prod) + # TO BE DEFINED + PODMAN_CONNECTION="mxz536.stackhero-network.com" + COMPOSE_FILE="compose_stackhero.yaml" + REGISTRY_PREFIX="" + COMPOSE_PROJECT_NAME="eveai_prod" + ;; + test) + PODMAN_CONNECTION="test-environment" + COMPOSE_FILE="compose_test.yaml" + REGISTRY_PREFIX="registry.ask-eve-ai-local.com/" + COMPOSE_PROJECT_NAME="eveai_test" + ;; + bugfix) + # TO BE DEFINED + PODMAN_CONNECTION="bugfix-environment" + COMPOSE_FILE="compose_bugfix.yaml" + COMPOSE_PROJECT_NAME="eveai_bugfix" + ;; + *) + echo "Invalid environment: $ENVIRONMENT" + usage + return 1 + ;; +esac + +# Set container registry account +CONTAINER_ACCOUNT="josakola" + +# Handle remote connections for podman +if [[ "$PODMAN_CONNECTION" != "default" ]]; then + echo "Setting up remote podman connection: $PODMAN_CONNECTION" + + # Check if podman connection exists + if ! podman system connection list --format '{{.Name}}' 2>/dev/null | grep -q "^$PODMAN_CONNECTION$"; then + echo "Warning: Podman connection '$PODMAN_CONNECTION' does not exist." + echo -n "Do you want to set up this connection now? (y/n): " + read -r CREATE_CONNECTION + if [[ "$CREATE_CONNECTION" == "y" || "$CREATE_CONNECTION" == "Y" ]]; then + echo -n "Please specify the SSH connection string (e.g., user@remote_host): " + read -r SSH_CONNECTION + + if [[ -n "$SSH_CONNECTION" ]]; then + podman system connection add "$PODMAN_CONNECTION" --identity ~/.ssh/id_rsa "ssh://$SSH_CONNECTION/run/user/1000/podman/podman.sock" + if [[ $? -ne 0 ]]; then + echo "Failed to create podman connection. Please create it manually." + return 1 + fi + else + echo "No SSH connection string provided." + return 1 + fi + else + echo "Using local podman setup instead." + PODMAN_CONNECTION="default" + fi + fi + + # Set the connection + if [[ "$PODMAN_CONNECTION" != "default" ]]; then + # Use podman context instead of manually setting CONTAINER_HOST + podman system connection default "$PODMAN_CONNECTION" 2>/dev/null + if [[ $? -eq 0 ]]; then + echo "Switched to remote podman connection: $PODMAN_CONNECTION" + else + echo "Warning: Failed to switch to connection $PODMAN_CONNECTION, using local setup" + PODMAN_CONNECTION="default" + fi + fi +else + echo "Using local podman setup with AMD64 platform" + # Ensure we're using the default local connection + podman system connection default "" 2>/dev/null || true +fi + +# Check if compose file exists +if [[ ! -f "$COMPOSE_FILE" ]]; then + echo "Warning: Compose file '$COMPOSE_FILE' does not exist." + if [[ -f "compose_dev.yaml" ]]; then + echo -n "Do you want to create it based on compose_dev.yaml? (y/n): " + read -r CREATE_FILE + if [[ "$CREATE_FILE" == "y" || "$CREATE_FILE" == "Y" ]]; then + # Create new compose file based on compose_dev.yaml with version variables + if sed 's/\(image: josakola\/[^:]*\):latest/\1:${EVEAI_VERSION:-latest}/g' compose_dev.yaml > "$COMPOSE_FILE" 2>/dev/null; then + echo "Created $COMPOSE_FILE with version placeholders." + else + echo "Failed to create $COMPOSE_FILE" + return 1 + fi + else + echo "Cannot proceed without a valid compose file." + return 1 + fi + else + echo "Cannot create $COMPOSE_FILE: compose_dev.yaml not found." + return 1 + fi +fi + +# Set environment variables +export COMPOSE_FILE=$COMPOSE_FILE +export EVEAI_VERSION=$VERSION +export CONTAINER_ACCOUNT=$CONTAINER_ACCOUNT +export CONTAINER_CMD=$CONTAINER_CMD +export COMPOSE_CMD_PATH=$COMPOSE_CMD_PATH +export REGISTRY_PREFIX=$REGISTRY_PREFIX +export COMPOSE_PROJECT_NAME=$COMPOSE_PROJECT_NAME + +echo "Set COMPOSE_FILE to $COMPOSE_FILE" +echo "Set EVEAI_VERSION to $VERSION" +echo "Set CONTAINER_ACCOUNT to $CONTAINER_ACCOUNT" +echo "Set platform to AMD64 (linux/amd64)" +echo "Set registry prefix to $REGISTRY_PREFIX" +echo "Set project name to $COMPOSE_PROJECT_NAME" + +# Define compose wrapper functions using the full path to avoid recursion +pc() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE "$@" +} + +pcup() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE up -d --remove-orphans "$@" +} + +pcdown() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE down "$@" +} + +pcps() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE ps "$@" +} + +pclogs() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE logs "$@" +} + +pcpull() { + echo "Pulling AMD64 images..." + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE pull "$@" +} + +pcrefresh() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE pull && $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE up -d --remove-orphans "$@" +} + +pcbuild() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE build "$@" +} + +pcrestart() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE restart "$@" +} + +pcstop() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE stop "$@" +} + +pcstart() { + $COMPOSE_CMD_PATH -p ${COMPOSE_PROJECT_NAME} -f $COMPOSE_FILE start "$@" +} +# Export functions - handle both bash and zsh +if [[ -n "$ZSH_VERSION" ]]; then + # In zsh, functions are automatically available in subshells + # But we can make them available globally with typeset + typeset -f pc pcup pcdown pcps pclogs pcpull pcrefresh pcbuild pcrestart pcstop pcstart > /dev/null +else + # Bash style export + export -f pc pcup pcdown pcps pclogs pcpull pcrefresh pcbuild pcrestart pcstop pcstart +fi + +echo "✅ Podman environment switched to $ENVIRONMENT with version $VERSION" +echo "🖥️ Platform: AMD64 (compatible with both Intel and Apple Silicon)" +echo "Available commands:" +echo " pc - podman-compose shorthand" +echo " pcup - start services in background" +echo " pcdown - stop and remove services" +echo " pcps - list running services" +echo " pclogs - view service logs" +echo " pcpull - pull latest images" +echo " pcrefresh - pull and restart services" +echo " pcbuild - build services" +echo " pcrestart - restart services" +echo " pcstop - stop services" +echo " pcstart - start stopped services" \ No newline at end of file diff --git a/documentation/containerd_cri_troubleshooting.md b/documentation/containerd_cri_troubleshooting.md new file mode 100644 index 0000000..cabe523 --- /dev/null +++ b/documentation/containerd_cri_troubleshooting.md @@ -0,0 +1,365 @@ +# Containerd CRI Plugin Troubleshooting Guide + +**Datum:** 18 augustus 2025 +**Auteur:** EveAI Development Team +**Versie:** 1.0 + +## Overzicht + +Dit document beschrijft de oplossing voor een kritiek probleem met de containerd Container Runtime Interface (CRI) plugin in het EveAI Kubernetes development cluster. Het probleem verhinderde de succesvolle opstart van Kind clusters en resulteerde in niet-functionele Kubernetes nodes. + +## Probleem Beschrijving + +### Symptomen + +Het EveAI development cluster ondervond de volgende problemen: + +1. **Kind cluster creatie faalde** met complexe kubeadmConfigPatches +2. **Control-plane nodes bleven in `NotReady` status** +3. **Container runtime toonde `Unknown` status** +4. **Kubelet kon niet communiceren** met de container runtime +5. **Ingress pods konden niet worden gescheduled** +6. **Cluster was volledig niet-functioneel** + +### Foutmeldingen + +#### Primaire Fout - Containerd CRI Plugin +``` +failed to create CRI service: failed to create cni conf monitor for default: +failed to create fsnotify watcher: too many open files +``` + +#### Kubelet Communicatie Fouten +``` +rpc error: code = Unimplemented desc = unknown service runtime.v1.RuntimeService +``` + +#### Node Status Problemen +``` +NAME STATUS ROLES AGE VERSION +eveai-dev-cluster-control-plane NotReady control-plane 5m v1.33.1 +``` + +## Root Cause Analyse + +### Hoofdoorzaak + +Het probleem had twee hoofdcomponenten: + +1. **Complexe Kind Configuratie**: De oorspronkelijke `kind-dev-cluster.yaml` bevatte complexe `kubeadmConfigPatches` en `containerdConfigPatches` die de cluster initialisatie verstoorden. + +2. **File Descriptor Limits**: De containerd service kon geen fsnotify watcher aanmaken voor CNI configuratie monitoring vanwege "too many open files" beperkingen binnen de Kind container omgeving. + +### Technische Details + +#### Kind Configuratie Problemen +De oorspronkelijke configuratie bevatte: +```yaml +kubeadmConfigPatches: + - | + kind: ClusterConfiguration + etcd: + local: + dataDir: /tmp/lib/etcd + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + authorization-mode: "Webhook" + feature-gates: "EphemeralContainers=true" +``` + +#### Containerd CRI Plugin Failure +De containerd service startte wel op, maar de CRI plugin faalde tijdens het laden: +- **Service Status**: `active (running)` +- **CRI Plugin**: `failed to load` +- **Gevolg**: Kubelet kon niet communiceren met container runtime + +## Oplossing Implementatie + +### Stap 1: Kind Configuratie Vereenvoudiging + +**Probleem**: Complexe kubeadmConfigPatches veroorzaakten initialisatie problemen. + +**Oplossing**: Vereenvoudigde configuratie naar minimale, werkende setup: + +```yaml +# Voor: Complexe configuratie +kubeadmConfigPatches: + - | + kind: ClusterConfiguration + etcd: + local: + dataDir: /tmp/lib/etcd + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + authorization-mode: "Webhook" + feature-gates: "EphemeralContainers=true" + +# Na: Vereenvoudigde configuratie +kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" +``` + +### Stap 2: Containerd ConfigPatches Uitschakeling + +**Probleem**: Registry configuratie patches veroorzaakten containerd opstartproblemen. + +**Oplossing**: Tijdelijk uitgeschakeld voor stabiliteit: + +```yaml +# Temporarily disabled for testing +# containerdConfigPatches: +# - |- +# [plugins."io.containerd.grpc.v1.cri".registry] +# config_path = "/etc/containerd/certs.d" +``` + +### Stap 3: Setup Script Verbeteringen + +#### A. Container Limits Configuratie Functie + +Toegevoegd aan `setup-dev-cluster.sh`: + +```bash +# Configure container resource limits to prevent CRI issues +configure_container_limits() { + print_status "Configuring container resource limits..." + + # Configure file descriptor and inotify limits to prevent CRI plugin failures + podman exec "${CLUSTER_NAME}-control-plane" sh -c ' + echo "fs.inotify.max_user_instances = 1024" >> /etc/sysctl.conf + echo "fs.inotify.max_user_watches = 524288" >> /etc/sysctl.conf + echo "fs.file-max = 2097152" >> /etc/sysctl.conf + sysctl -p + ' + + # Restart containerd to apply new limits + print_status "Restarting containerd with new limits..." + podman exec "${CLUSTER_NAME}-control-plane" systemctl restart containerd + + # Wait for containerd to stabilize + sleep 10 + + # Restart kubelet to ensure proper CRI communication + podman exec "${CLUSTER_NAME}-control-plane" systemctl restart kubelet + + print_success "Container limits configured and services restarted" +} +``` + +#### B. CRI Status Verificatie Functie + +```bash +# Verify CRI status and functionality +verify_cri_status() { + print_status "Verifying CRI status..." + + # Wait for services to stabilize + sleep 15 + + # Test CRI connectivity + if podman exec "${CLUSTER_NAME}-control-plane" crictl version &>/dev/null; then + print_success "CRI is functional" + + # Show CRI version info + print_status "CRI version information:" + podman exec "${CLUSTER_NAME}-control-plane" crictl version + else + print_error "CRI is not responding - checking containerd logs" + podman exec "${CLUSTER_NAME}-control-plane" journalctl -u containerd --no-pager -n 20 + + print_error "Checking kubelet logs" + podman exec "${CLUSTER_NAME}-control-plane" journalctl -u kubelet --no-pager -n 10 + + return 1 + fi + + # Verify node readiness + print_status "Waiting for node to become Ready..." + local max_attempts=30 + local attempt=0 + + while [ $attempt -lt $max_attempts ]; do + if kubectl get nodes | grep -q "Ready"; then + print_success "Node is Ready" + return 0 + fi + + attempt=$((attempt + 1)) + print_status "Attempt $attempt/$max_attempts - waiting for node readiness..." + sleep 10 + done + + print_error "Node failed to become Ready within timeout" + kubectl get nodes -o wide + return 1 +} +``` + +#### C. Hoofduitvoering Update + +```bash +# Main execution +main() { + # ... existing code ... + + check_prerequisites + create_host_directories + create_cluster + configure_container_limits # ← Nieuw toegevoegd + verify_cri_status # ← Nieuw toegevoegd + install_ingress_controller + apply_manifests + verify_cluster + + # ... rest of function ... +} +``` + +## Resultaten + +### ✅ Succesvolle Oplossingen + +1. **Cluster Creatie**: Kind clusters worden nu succesvol aangemaakt +2. **Node Status**: Control-plane nodes bereiken `Ready` status +3. **CRI Functionaliteit**: Container runtime communiceert correct met kubelet +4. **Basis Kubernetes Operaties**: Deployments, services, en pods werken correct + +### ⚠️ Resterende Beperkingen + +**Ingress Controller Probleem**: De NGINX Ingress controller ondervindt nog steeds "too many open files" fouten vanwege file descriptor beperkingen die niet kunnen worden aangepast binnen de Kind container omgeving. + +**Foutmelding**: +``` +too many open files +``` + +**Oorzaak**: Dit is een beperking van de Kind/Podman setup waar kernel parameters niet kunnen worden aangepast vanuit containers. + +## Troubleshooting Commands + +### Diagnose Commands + +```bash +# Controleer containerd status +ssh minty "podman exec eveai-dev-cluster-control-plane systemctl status containerd" + +# Bekijk containerd logs +ssh minty "podman exec eveai-dev-cluster-control-plane journalctl -u containerd -f" + +# Test CRI connectiviteit +ssh minty "podman exec eveai-dev-cluster-control-plane crictl version" + +# Controleer file descriptor usage +ssh minty "podman exec eveai-dev-cluster-control-plane sh -c 'lsof | wc -l'" + +# Controleer node status +kubectl get nodes -o wide + +# Controleer kubelet logs +ssh minty "podman exec eveai-dev-cluster-control-plane journalctl -u kubelet --no-pager -n 20" +``` + +### Cluster Management + +```bash +# Cluster verwijderen (met Podman provider) +KIND_EXPERIMENTAL_PROVIDER=podman kind delete cluster --name eveai-dev-cluster + +# Nieuwe cluster aanmaken +cd /path/to/k8s/dev && ./setup-dev-cluster.sh + +# Cluster status controleren +kubectl get all -n eveai-dev +``` + +## Preventieve Maatregelen + +### 1. Configuratie Validatie + +- **Minimale Kind Configuratie**: Gebruik alleen noodzakelijke kubeadmConfigPatches +- **Stapsgewijze Uitbreiding**: Voeg complexe configuraties geleidelijk toe +- **Testing**: Test elke configuratiewijziging in isolatie + +### 2. Monitoring + +- **Health Checks**: Implementeer uitgebreide CRI status controles +- **Logging**: Monitor containerd en kubelet logs voor vroege waarschuwingen +- **Automatische Recovery**: Implementeer automatische herstart procedures + +### 3. Documentatie + +- **Configuratie Geschiedenis**: Documenteer alle configuratiewijzigingen +- **Troubleshooting Procedures**: Onderhoud actuele troubleshooting guides +- **Known Issues**: Bijhouden van bekende beperkingen en workarounds + +## Aanbevelingen voor Productie + +### 1. Infrastructure Alternatieven + +Voor productie-omgevingen waar Ingress controllers essentieel zijn: + +- **Volledige VM Setup**: Gebruik echte virtuele machines waar kernel parameters kunnen worden geconfigureerd +- **Bare-metal Kubernetes**: Implementeer op fysieke hardware voor volledige controle +- **Managed Kubernetes**: Overweeg cloud-managed solutions (EKS, GKE, AKS) + +### 2. Host-level Configuratie + +```bash +# Op de host (minty) machine +sudo mkdir -p /etc/systemd/system/user@.service.d/ +sudo tee /etc/systemd/system/user@.service.d/limits.conf << EOF +[Service] +LimitNOFILE=1048576 +LimitNPROC=1048576 +EOF +sudo systemctl daemon-reload +``` + +### 3. Alternatieve Ingress Controllers + +Test andere ingress controllers die mogelijk lagere file descriptor vereisten hebben: +- **Traefik** +- **HAProxy Ingress** +- **Istio Gateway** + +## Conclusie + +De containerd CRI plugin failure is succesvol opgelost door: + +1. **Vereenvoudiging** van de Kind cluster configuratie +2. **Implementatie** van container resource limits configuratie +3. **Toevoeging** van uitgebreide CRI status verificatie +4. **Verbetering** van error handling en diagnostics + +Het cluster is nu volledig functioneel voor basis Kubernetes operaties. De resterende Ingress controller beperking is een bekende limitatie van de Kind/Podman omgeving en vereist alternatieve oplossingen voor productie gebruik. + +## Bijlagen + +### A. Gewijzigde Bestanden + +- `k8s/dev/setup-dev-cluster.sh` - Toegevoegde functies en verbeterde workflow +- `k8s/dev/kind-dev-cluster.yaml` - Vereenvoudigde configuratie +- `k8s/dev/kind-minimal.yaml` - Nieuwe minimale test configuratie + +### B. Tijdsinschatting Oplossing + +- **Probleem Identificatie**: 2-3 uur +- **Root Cause Analyse**: 1-2 uur +- **Oplossing Implementatie**: 2-3 uur +- **Testing en Verificatie**: 1-2 uur +- **Documentatie**: 1 uur +- **Totaal**: 7-11 uur + +### C. Lessons Learned + +1. **Complexiteit Vermijden**: Start met minimale configuraties en bouw geleidelijk uit +2. **Systematische Diagnose**: Gebruik gestructureerde troubleshooting approaches +3. **Environment Beperkingen**: Begrijp de beperkingen van containerized Kubernetes (Kind) +4. **Monitoring Essentieel**: Implementeer uitgebreide health checks en logging +5. **Documentatie Cruciaal**: Documenteer alle wijzigingen en procedures voor toekomstig gebruik \ No newline at end of file diff --git a/documentation/evie_object_storage_governance.md b/documentation/evie_object_storage_governance.md new file mode 100644 index 0000000..14acaae --- /dev/null +++ b/documentation/evie_object_storage_governance.md @@ -0,0 +1,202 @@ +# Evie Object Storage Governance (Optie 3) + +**Doel:** 1 bucket per omgeving (staging / prod), met **prefixen per +tenant**. Duidelijke scheiding van datatypes (documents vs assets), lage +beheerlast, goed schaalbaar. + +------------------------------------------------------------------------ + +## 1) Structuur & naamgeving + +### Buckets (per omgeving) + +- **staging:** `evie-staging` +- **prod:** `evie-prod` + +> Buckets zijn S3-compatibel op Scaleway +> (`https://s3..scw.cloud`). Houd buckets "plat" (alle tenants +> als prefix). + +### Prefix layout (per tenant) + + / + tenant-/ + documents/ + ... + assets/ + ... + +**Conventies** - **Tenant prefix:** `tenant-` (tenantId = +interne stabiele sleutel; geen PII). - **Datatypes:** `documents/` en +`assets/` (harde scheiding). - **Bestandsnamen:** `snake-case` of +`kebab-case`; voeg optioneel datum/uuid toe bij uploads die kunnen +conflicteren. + +------------------------------------------------------------------------ + +## 2) Toegang & secrets + +### IAM-model + +- **Één IAM Application per omgeving** + - `evie-staging-app` → keys in **staging** k8s Secret\ + - `evie-prod-app` → keys in **prod** k8s Secret\ +- Toegang **alleen** tot het eigen bucket (`evie-staging` of + `evie-prod`). + +### App-side secrets (env) + +- `S3_ENDPOINT=https://s3..scw.cloud` +- `S3_BUCKET=evie-` +- `S3_ACCESS_KEY=***` +- `S3_SECRET_KEY=***` +- `S3_REGION=` (bv. `fr-par`) +- (optioneel) `S3_FORCE_PATH_STYLE=false` + +> **Presigned uploads**: genereer **server-side** presigned URL's per +> tenant/prefix; geef nooit de master-keys aan de client. + +------------------------------------------------------------------------ + +## 3) Policies (conceptueel) + +- **Bucket policy**: sta alleen requests toe met geldige credentials + van de Evie-app van die omgeving. +- **Prefix scope** (in app-logica): alle reads/writes **moeten** met + pad beginnen op `tenant-/...`. +- **Optioneel** (later): extra policy-groepen voor specifieke + workflows (vb. tijdelijke ingest job). + +> **Belangrijk:** autorisatie op tenantniveau afdwingen in **je +> applicatie** (context = `tenantId`). Nooit paden samenstellen vanuit +> user input zonder whitelisting/validation. + +------------------------------------------------------------------------ + +## 4) Lifecycle & retentie + +**Doel:** kosten beheersen, assets sneller "kouder", documenten langer +bewaren. + + ----------------------------------------------------------------------- + Scope (Filter) Regel + ----------------------------------- ----------------------------------- + `tenant-*/assets/` → One Zone-IA na 30 dagen + + `tenant-*/assets/` → Glacier/Archive na 180 dagen + (optioneel) + + `tenant-*/documents/` → Standard (geen transition) of IA + na 180d + + `tenant-*/documents/` (tijdelijke Expire (delete) na 7--14 dagen + previews) + ----------------------------------------------------------------------- + +> Lifecycle definieer je **per bucket** met **prefix filters**, zodat +> regels verschillend zijn voor `assets/` en `documents/`. + +------------------------------------------------------------------------ + +## 5) CORS & distributie + +- **CORS**: indien browser direct upload/download doet, whitelist de + domeinen van je app (origin), en methodes `GET, PUT, POST`. Alleen + benodigde headers toestaan. +- **Publieke distributie** (indien nodig): + - Kleine public-reads via presigned URL's (aanbevolen).\ + - Of activeer publieke read op een **specifieke** `public/`-prefix + (niet op de hele bucket).\ + - Overweeg een CDN/edge-lag via Scaleway Edge Services voor + veelgevraagde assets. + +------------------------------------------------------------------------ + +## 6) Observability & beheer + +- **Logging/metrics**: + - App: log alle S3 calls met `tenantId` + object key.\ + - Platform: gebruik Scaleway Cockpit voor capacity & request + metrics. +- **Quota & limieten**: + - 1 bucket per omgeving beperkt "bucket-sprawl".\ + - Objecten en totale grootte zijn praktisch onbeperkt; plan wel + lifecycle om groei te managen. +- **Monitoring**: + - Alerts op snelgroeiende **assets**-prefixen, high error rates + (4xx/5xx), en mislukte lifecycle-transities. + +------------------------------------------------------------------------ + +## 7) Operationele workflows + +### Tenant aanmaken + +1. DB schema provisionen. +2. (S3) **Geen nieuwe bucket**; enkel **prefix**: + `tenant-/documents/` en `tenant-/assets/` zijn impliciet. +3. (Optioneel) Init bestanden/placeholder objecten. +4. App-config linkt de tenant aan zijn prefix (centrale mapping). + +### Upload (app -\> S3) + +1. App valideert `tenantId` en datatype (`documents|assets`).\ +2. App construeert **canonical path**: `tenant-//<...>`\ +3. App genereert **presigned PUT** (tijdelijk) en geeft terug aan + frontend.\ +4. Frontend uploadt rechtstreeks naar S3 met presigned URL. + +### Download / Serve + +- Interne downloads: app signed GET of server-side stream.\ +- Externe/public: **presigned GET** met korte TTL of via public-only + prefix + CDN. + +### Opruimen & lifecycle + +- Tijdelijke artefacten: app scheduled cleanup (of lifecycle + "Expiration").\ +- Archivering: lifecycle transitions per prefix. + +------------------------------------------------------------------------ + +## 8) Beveiliging + +- **Least privilege**: IAM-keys enkel voor het bucket van de + omgeving.\ +- **Encryptie**: server-side encryption (default) volstaat vaak; + overweeg KMS als apart key-beleid nodig is.\ +- **Auditing**: log alle **write**-operaties met gebruikers- en + tenantcontext.\ +- **Backups**: documenten zijn de "bron"? Zo ja, S3 is primaire opslag + en RAG-index kan herbouwd worden. Anders: definieer + export/replica-strategie. + +------------------------------------------------------------------------ + +## 9) Migratie van MinIO → Scaleway + +1. **Freeze window** (kort): pauzeer uploads of werk met **duale + write** (MinIO + S3) gedurende migratie.\ +2. **Sync**: gebruik `rclone` of `mc mirror` om + `minio://bucket/tenant-*/{documents,assets}/` → + `s3://evie-/tenant-*/...`.\ +3. **Verifieer**: random checksums / sample reads per tenant.\ +4. **Switch**: zet `S3_ENDPOINT` en keys naar Scaleway; laat nieuwe + writes enkel naar S3 gaan.\ +5. **Decom**: na grace-periode MinIO uitfaseren. + +------------------------------------------------------------------------ + +## 10) Checklist (TL;DR) + +- [ ] Buckets: `evie-staging`, `evie-prod`.\ +- [ ] Prefix: `tenant-/{documents,assets}/`.\ +- [ ] IAM: 1 Application per omgeving; keys in k8s Secret.\ +- [ ] Policy: alleen app-toegang; app dwingt prefix-scope per tenant + af.\ +- [ ] Lifecycle: assets sneller koud, docs langer.\ +- [ ] CORS: alleen noodzakelijke origins/methods.\ +- [ ] Presigned URLs voor browser interacties.\ +- [ ] Logging/metrics/alerts ingericht.\ +- [ ] Migratiepad van MinIO uitgewerkt en getest. diff --git a/documentation/k8s_dev_cluster.mermaid b/documentation/k8s_dev_cluster.mermaid new file mode 100644 index 0000000..392a2ec --- /dev/null +++ b/documentation/k8s_dev_cluster.mermaid @@ -0,0 +1,161 @@ +graph TB + %% Host Machine + subgraph "Host Machine (macOS)" + HOST[("Host Machine
macOS Sonoma")] + PODMAN[("Podman
Container Runtime")] + HOSTDIRS[("Host Directories
~/k8s-data/dev/
• minio
• redis
• logs
• prometheus
• grafana
• certs")] + end + + %% Kind Cluster + subgraph "Kind Cluster (eveai-dev-cluster)" + %% Control Plane + CONTROL[("Control Plane Node
Port Mappings:
• 80:30080
• 443:30443
• 3080:30080")] + + %% Ingress Controller + subgraph "ingress-nginx namespace" + INGRESS[("NGINX Ingress Controller
Handles routing to services")] + end + + %% EveAI Dev Namespace + subgraph "eveai-dev namespace" + %% Web Services + subgraph "Web Services" + APP[("EveAI App
Port: 5001
NodePort: 30001")] + API[("EveAI API
Port: 5003
NodePort: 30003")] + CHAT[("EveAI Chat Client
Port: 5004
NodePort: 30004")] + STATIC[("Static Files Service
NGINX
Port: 80")] + end + + %% Background Services + subgraph "Background Workers" + WORKERS[("EveAI Workers
Replicas: 2
Celery Workers")] + CHATWORKERS[("EveAI Chat Workers
Replicas: 2
Celery Workers")] + BEAT[("EveAI Beat
Celery Scheduler
Replicas: 1")] + ENTITLE[("EveAI Entitlements
Port: 8000")] + end + + %% Infrastructure Services + subgraph "Infrastructure Services" + REDIS[("Redis
Port: 6379
NodePort: 30379")] + MINIO[("MinIO
Port: 9000
Console: 9001
NodePort: 30900")] + end + + %% Monitoring Services + subgraph "Monitoring Stack" + PROM[("Prometheus
Port: 9090")] + GRAFANA[("Grafana
Port: 3000")] + NGINX_EXPORTER[("NGINX Prometheus Exporter
Port: 9113")] + end + + %% Storage + subgraph "Persistent Storage" + PV_REDIS[("Redis PV
5Gi Local")] + PV_MINIO[("MinIO PV
20Gi Local")] + PV_LOGS[("App Logs PV
5Gi Local")] + PV_PROM[("Prometheus PV
10Gi Local")] + PV_GRAFANA[("Grafana PV
5Gi Local")] + end + + %% Configuration + subgraph "Configuration" + CONFIGMAP[("eveai-config
ConfigMap")] + SECRETS[("eveai-secrets
Secret")] + end + end + end + + %% External Registry + REGISTRY[("Container Registry
registry.ask-eve-ai-local.com
josakola/eveai_*")] + + %% Connections + HOST --> PODMAN + PODMAN --> CONTROL + HOSTDIRS --> PV_REDIS + HOSTDIRS --> PV_MINIO + HOSTDIRS --> PV_LOGS + HOSTDIRS --> PV_PROM + HOSTDIRS --> PV_GRAFANA + + %% Service connections + CONTROL --> INGRESS + INGRESS --> APP + INGRESS --> API + INGRESS --> CHAT + INGRESS --> STATIC + + %% Worker connections to Redis + WORKERS --> REDIS + CHATWORKERS --> REDIS + BEAT --> REDIS + + %% All services connect to storage + APP --> PV_LOGS + API --> PV_LOGS + CHAT --> PV_LOGS + WORKERS --> PV_LOGS + CHATWORKERS --> PV_LOGS + BEAT --> PV_LOGS + ENTITLE --> PV_LOGS + + %% Infrastructure storage + REDIS --> PV_REDIS + MINIO --> PV_MINIO + PROM --> PV_PROM + GRAFANA --> PV_GRAFANA + + %% Configuration connections + CONFIGMAP --> APP + CONFIGMAP --> API + CONFIGMAP --> CHAT + CONFIGMAP --> WORKERS + CONFIGMAP --> CHATWORKERS + CONFIGMAP --> BEAT + CONFIGMAP --> ENTITLE + + SECRETS --> APP + SECRETS --> API + SECRETS --> CHAT + SECRETS --> WORKERS + SECRETS --> CHATWORKERS + SECRETS --> BEAT + SECRETS --> ENTITLE + + %% Registry connections + REGISTRY --> APP + REGISTRY --> API + REGISTRY --> CHAT + REGISTRY --> WORKERS + REGISTRY --> CHATWORKERS + REGISTRY --> BEAT + REGISTRY --> ENTITLE + + %% Monitoring connections + PROM --> APP + PROM --> API + PROM --> CHAT + PROM --> REDIS + PROM --> MINIO + PROM --> NGINX_EXPORTER + GRAFANA --> PROM + + %% External Access + subgraph "External Access" + ACCESS[("http://minty.ask-eve-ai-local.com:3080
• /admin/ → App
• /api/ → API
• /chat-client/ → Chat
• /static/ → Static Files")] + end + + ACCESS --> INGRESS + + %% Styling + classDef webService fill:#e1f5fe,stroke:#01579b,stroke-width:2px + classDef infrastructure fill:#f3e5f5,stroke:#4a148c,stroke-width:2px + classDef storage fill:#e8f5e8,stroke:#1b5e20,stroke-width:2px + classDef monitoring fill:#fff3e0,stroke:#e65100,stroke-width:2px + classDef config fill:#fce4ec,stroke:#880e4f,stroke-width:2px + classDef external fill:#f1f8e9,stroke:#33691e,stroke-width:2px + + class APP,API,CHAT,STATIC webService + class REDIS,MINIO,WORKERS,CHATWORKERS,BEAT,ENTITLE infrastructure + class PV_REDIS,PV_MINIO,PV_LOGS,PV_PROM,PV_GRAFANA,HOSTDIRS storage + class PROM,GRAFANA,NGINX_EXPORTER monitoring + class CONFIGMAP,SECRETS config + class REGISTRY,ACCESS external \ No newline at end of file diff --git a/eveai_app/templates/eveai_json_editor.html b/eveai_app/templates/eveai_json_editor.html index b457a6d..b0c4fba 100644 --- a/eveai_app/templates/eveai_json_editor.html +++ b/eveai_app/templates/eveai_json_editor.html @@ -40,10 +40,10 @@ window.EveAI.JsonEditors = { mainMenuBar: options.mainMenuBar !== undefined ? options.mainMenuBar : true, navigationBar: options.navigationBar !== undefined ? options.navigationBar : false, statusBar: options.statusBar !== undefined ? options.statusBar : !isReadOnly, - onChange: (updatedContent, previousContent, { contentErrors, patchResult }) => { - // content is an object { json: unknown } | { text: string } - console.log('onChange', { updatedContent, previousContent, contentErrors, patchResult }) - } + onChange: options.onChange || ((updatedContent, previousContent, { contentErrors, patchResult }) => { + // Default onChange behavior - alleen loggen + console.log('onChange', { updatedContent, previousContent, contentErrors, patchResult }); + }) }; console.log('EditorProps', editorProps); @@ -107,13 +107,51 @@ document.addEventListener('DOMContentLoaded', function() { window.EveAI.JsonEditors.initialize(containerId, data, { mode: isReadOnly ? 'preview' : 'tree', readOnly: isReadOnly, - onChangeText: isReadOnly ? undefined : (jsonString) => { textarea.value = jsonString; } + onChange: isReadOnly ? undefined : (updatedContent, previousContent, { contentErrors, patchResult }) => { + // Automatische synchronisatie naar textarea bij elke wijziging + if (updatedContent.json !== undefined) { + textarea.value = JSON.stringify(updatedContent.json, null, 2); + } else if (updatedContent.text !== undefined) { + textarea.value = updatedContent.text; + } + console.log('Textarea automatisch bijgewerkt via onChange'); + } }); } catch (e) { console.error('Error parsing initial JSON for .json-editor:', e); container.innerHTML = `
Error loading JSON data:
${e.message}
`; } }); + + // Real-time synchronisatie als extra beveiliging + setInterval(function() { + document.querySelectorAll('.json-editor').forEach(function(textarea) { + if (textarea.style.display === 'none') { // Alleen voor verborgen textareas (zoals in edit forms) + const containerId = textarea.id + '-editor'; + const editor = window.EveAI?.JsonEditors?.get(containerId); + if (editor && editor.get) { + try { + const content = editor.get(); + let newValue = ''; + if (content.json !== undefined) { + newValue = JSON.stringify(content.json, null, 2); + } else if (content.text !== undefined) { + newValue = content.text; + } + + // Alleen updaten als de waarde daadwerkelijk is veranderd + if (textarea.value !== newValue) { + textarea.value = newValue; + console.log('Real-time sync uitgevoerd voor', textarea.id); + } + } catch (e) { + // Stil falen - geen console spam + } + } + } + }); + }, 1000); // Elke seconde controleren + // Alleen-lezen containers document.querySelectorAll('.json-viewer').forEach(function(container) { const dataElement = document.getElementById(container.id + '-data'); diff --git a/eveai_app/templates/interaction/edit_asset.html b/eveai_app/templates/interaction/edit_asset.html index f0f3444..8a8d35d 100644 --- a/eveai_app/templates/interaction/edit_asset.html +++ b/eveai_app/templates/interaction/edit_asset.html @@ -77,122 +77,42 @@