- Metrics service toegevoegd

- Applicatie services starten op, behalve eveai_chat_client - Connectiviteit naar admin / eveai_app niet functioneel
2025-08-20 11:49:19 +02:00
parent d6a2635e50
commit 9c63ecb17f
5 changed files with 156 additions and 283 deletions
--- a/k8s/dev/deploy-all-services.sh
+++ b/k8s/dev/deploy-all-services.sh
@@ -1,283 +0,0 @@
 #!/bin/bash
 # Deploy All EveAI Dev Services Script
 # File: deploy-all-services.sh
 set -e
 # Colors voor output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 # Function voor colored output
 print_status() {
    echo -e "${BLUE}[INFO]${NC} $1"
 }
 print_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1"
 }
 print_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
 }
 print_error() {
    echo -e "${RED}[ERROR]${NC} $1"
 }
 # Check if kubectl is pointing to the right cluster
 check_cluster_context() {
    print_status "Checking cluster context..."
    CURRENT_CONTEXT=$(kubectl config current-context)
    if [[ "$CURRENT_CONTEXT" != "kind-eveai-dev-cluster" ]]; then
        print_error "Wrong cluster context: $CURRENT_CONTEXT"
        print_error "Expected: kind-eveai-dev-cluster"
        echo "Switch context with: kubectl config use-context kind-eveai-dev-cluster"
        exit 1
    fi
    print_success "Using correct cluster context: $CURRENT_CONTEXT"
 }
 # Wait for pods to be ready
 wait_for_pods() {
    local namespace=$1
    local app_label=$2
    local timeout=${3:-300}
    print_status "Waiting for $app_label pods to be ready..."
    if kubectl wait --for=condition=Ready pods -l app=$app_label -n $namespace --timeout=${timeout}s; then
        print_success "$app_label pods are ready"
        return 0
    else
        print_error "$app_label pods failed to become ready within ${timeout}s"
        return 1
    fi
 }
 # Deploy services in correct order
 deploy_infrastructure() {
    print_status "Deploying infrastructure services (Redis, MinIO)..."
    if kubectl apply -f redis-minio-services.yaml; then
        print_success "Infrastructure services deployed"
    else
        print_error "Failed to deploy infrastructure services"
        exit 1
    fi
    # Wait for infrastructure to be ready
    wait_for_pods "eveai-dev" "redis" 180
    wait_for_pods "eveai-dev" "minio" 300
 }
 deploy_application_services() {
    print_status "Deploying EveAI application services..."
    if kubectl apply -f eveai-services.yaml; then
        print_success "Application services deployed"
    else
        print_error "Failed to deploy application services"
        exit 1
    fi
    # Wait for key services to be ready
    wait_for_pods "eveai-dev" "eveai-app" 180
    wait_for_pods "eveai-dev" "eveai-api" 180
    wait_for_pods "eveai-dev" "eveai-chat-client" 180
 }
 deploy_static_ingress() {
    print_status "Deploying static files service and Ingress..."
    # Deploy static files service
    if kubectl apply -f static-files-service.yaml; then
        print_success "Static files service deployed"
    else
        print_error "Failed to deploy static files service"
        exit 1
    fi
    # Deploy Ingress
    if kubectl apply -f eveai-ingress.yaml; then
        print_success "Ingress deployed"
    else
        print_error "Failed to deploy Ingress"
        exit 1
    fi
    # Wait for services to be ready
    wait_for_pods "eveai-dev" "static-files" 60
    # Wait for Ingress to be ready
    print_status "Waiting for Ingress to be ready..."
    kubectl wait --namespace eveai-dev \
      --for=condition=ready ingress/eveai-ingress \
      --timeout=120s || print_warning "Ingress might still be starting up"
 }
 deploy_monitoring_only() {
    print_status "Deploying monitoring services..."
    if kubectl apply -f monitoring-services.yaml; then
        print_success "Monitoring services deployed"
    else
        print_error "Failed to deploy monitoring services"
        exit 1
    fi
    # Wait for monitoring services
    wait_for_pods "eveai-dev" "flower" 120
    wait_for_pods "eveai-dev" "prometheus" 180
    wait_for_pods "eveai-dev" "grafana" 180
 }
 # Check service status
 check_services() {
    print_status "Checking service status..."
    echo ""
    print_status "Pods status:"
    kubectl get pods -n eveai-dev
    echo ""
    print_status "Services status:"
    kubectl get services -n eveai-dev
    echo ""
    print_status "Persistent Volume Claims:"
    kubectl get pvc -n eveai-dev
 }
 # Test service connectivity via Ingress
 test_connectivity_ingress() {
    print_status "Testing Ingress connectivity..."
    # Test Ingress endpoints
    endpoints=(
        "http://minty.ask-eve-ai-local.com:3080/admin/"
        "http://minty.ask-eve-ai-local.com:3080/api/healthz/ready"
        "http://minty.ask-eve-ai-local.com:3080/chat-client/"
        "http://minty.ask-eve-ai-local.com:3080/static/"
        "http://localhost:3009"  # MinIO Console (direct)
        "http://localhost:3010"  # Prometheus (direct)
        "http://localhost:3012"  # Grafana (direct)
    )
    for endpoint in "${endpoints[@]}"; do
        print_status "Testing $endpoint..."
        if curl -f -s --max-time 10 "$endpoint" > /dev/null; then
            print_success "$endpoint is responding via Ingress"
        else
            print_warning "$endpoint is not responding (may still be starting up)"
        fi
    done
 }
 # Test service connectivity (legacy function for backward compatibility)
 test_connectivity() {
    test_connectivity_ingress
 }
 # Show connection information for Ingress setup
 show_connection_info_ingress() {
    echo ""
    echo "=================================================="
    print_success "EveAI Dev Cluster deployed successfully!"
    echo "=================================================="
    echo ""
    echo "🌐 Service URLs (via Ingress):"
    echo "  Main Application:"
    echo "    • Main App:         http://minty.ask-eve-ai-local.com:3080/admin/"
    echo "    • API:              http://minty.ask-eve-ai-local.com:3080/api/"
    echo "    • Chat Client:      http://minty.ask-eve-ai-local.com:3080/chat-client/"
    echo "    • Static Files:     http://minty.ask-eve-ai-local.com:3080/static/"
    echo ""
    echo "  Infrastructure:"
    echo "    • Redis:            redis://minty.ask-eve-ai-local.com:3006"
    echo "    • MinIO S3:         http://minty.ask-eve-ai-local.com:3008"
    echo "    • MinIO Console:    http://minty.ask-eve-ai-local.com:3009"
    echo ""
    echo "  Monitoring:"
    echo "    • Flower (Celery):  http://minty.ask-eve-ai-local.com:3007"
    echo "    • Prometheus:       http://minty.ask-eve-ai-local.com:3010"
    echo "    • Grafana:          http://minty.ask-eve-ai-local.com:3012"
    echo ""
    echo "🔑 Default Credentials:"
    echo "  • MinIO:    minioadmin / minioadmin"
    echo "  • Grafana:  admin / admin"
    echo "  • Flower:   Felucia / Jungles"
    echo ""
    echo "🛠️  Management Commands:"
    echo "  • kubectl get all -n eveai-dev"
    echo "  • kubectl get ingress -n eveai-dev"
    echo "  • kubectl logs -f deployment/eveai-app -n eveai-dev"
    echo "  • kubectl describe ingress eveai-ingress -n eveai-dev"
    echo ""
    echo "🗂️  Data Persistence:"
    echo "  • Host data path: $HOME/k8s-data/dev/"
    echo "  • Logs path:      $HOME/k8s-data/dev/logs/"
 }
 # Show connection information (legacy function for backward compatibility)
 show_connection_info() {
    show_connection_info_ingress
 }
 # Main execution
 main() {
    echo "=================================================="
    echo "🚀 Deploying EveAI Dev Services to Kind Cluster"
    echo "=================================================="
    check_cluster_context
    # Deploy in stages
    deploy_infrastructure
    print_status "Infrastructure deployment completed, proceeding with applications..."
    sleep 5
    deploy_application_services
    print_status "Application deployment completed, proceeding with Nginx and monitoring..."
    sleep 5
    deploy_static_ingress
    deploy_monitoring_only
    print_status "All services deployed, running final checks..."
    sleep 10
    check_services
    test_connectivity_ingress
    show_connection_info_ingress
 }
 # Check for command line options
 case "${1:-}" in
    "infrastructure")
        check_cluster_context
        deploy_infrastructure
        ;;
    "apps")
        check_cluster_context
        deploy_application_services
        ;;
    "monitoring")
        check_cluster_context
        deploy_nginx_monitoring
        ;;
    "status")
        check_cluster_context
        check_services
        ;;
    "test")
        test_connectivity
        ;;
    *)
        main "$@"
        ;;
 esac
--- a/k8s/dev/eveai-services.yaml
+++ b/k8s/dev/eveai-services.yaml
@@ -34,6 +34,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-app
      tier: frontend
  template:
    metadata:
      labels:
@@ -119,6 +120,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-api
      tier: frontend
  template:
    metadata:
      labels:
@@ -204,6 +206,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-chat-client
      tier: frontend
  template:
    metadata:
      labels:
@@ -440,6 +443,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-entitlements
      tier: backend
  template:
    metadata:
      labels:
--- a/k8s/dev/ingress-nginx-resources-patch.yaml
+++ b/k8s/dev/ingress-nginx-resources-patch.yaml
@@ -0,0 +1,19 @@
 # Ingress-NGINX Controller Resource Patch
 # File: ingress-nginx-resources-patch.yaml
 # Purpose: Patch the ingress-nginx-controller deployment with higher resource limits
 #          to prevent pthread_create() failures and worker process crashes
 #
 # This is a strategic merge patch that will be applied using:
 # kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file=<this-file>
 spec:
  template:
    spec:
      containers:
      - name: controller
        resources:
          requests:
            cpu: 500m
            memory: 512Mi
          limits:
            cpu: 2000m
            memory: 2Gi
--- a/k8s/dev/metrics-server-patch.yaml
+++ b/k8s/dev/metrics-server-patch.yaml
@@ -0,0 +1,19 @@
 # Metrics Server Patch for Kind Compatibility
 # File: metrics-server-patch.yaml
 # Purpose: Patch the metrics-server deployment with Kind-specific configuration
 #          and appropriate resource limits for development environment
 #
 # This is a strategic merge patch that will be applied using:
 # kubectl patch deployment metrics-server -n kube-system --patch-file=<this-file>
 spec:
  template:
    spec:
      containers:
      - name: metrics-server
        args:
          - --cert-dir=/tmp
          - --secure-port=10250
          - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
          - --kubelet-use-node-status-port
          - --metric-resolution=15s
          - --kubelet-insecure-tls
--- a/k8s/dev/setup-dev-cluster.sh
+++ b/k8s/dev/setup-dev-cluster.sh
@@ -233,6 +233,118 @@ install_ingress_controller() {
    kubectl get services -n ingress-nginx
 }
 # Patch Ingress Controller Resources
 patch_ingress_resources() {
    print_status "Patching Ingress Controller resources..."
    # Wait a moment for the deployment to be fully created
    sleep 5
    # Check if patch file exists
    local patch_file="ingress-nginx-resources-patch.yaml"
    if [[ ! -f "$patch_file" ]]; then
        print_error "Patch file not found: $patch_file"
        return 1
    fi
    # Patch the ingress-nginx-controller deployment with higher resource limits
    print_status "Updating resource limits for ingress-nginx-controller using manifest file..."
    # Apply patch with retry logic
    local max_attempts=5
    local attempt=1
    local success=false
    while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do
        print_status "Attempt $attempt/$max_attempts - patching ingress controller resources..."
        if kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file "$patch_file"; then
            print_success "Successfully patched ingress-nginx-controller resources"
            success=true
        else
            if [ $attempt -lt $max_attempts ]; then
                print_warning "Patch attempt $attempt failed, retrying in 5 seconds..."
                sleep 5
                attempt=$((attempt + 1))
            else
                print_error "Failed to patch ingress-nginx-controller resources after $max_attempts attempts"
                return 1
            fi
        fi
    done
    # Wait for rollout to complete
    print_status "Waiting for ingress controller rollout to complete..."
    kubectl rollout status deployment/ingress-nginx-controller -n ingress-nginx --timeout=300s
    if [ $? -eq 0 ]; then
        print_success "Ingress Controller resource patch completed successfully"
        # Verify the new resource settings
        print_status "Verifying new resource settings..."
        kubectl describe deployment ingress-nginx-controller -n ingress-nginx | grep -A 10 "Limits:\|Requests:" || true
    else
        print_error "Ingress Controller rollout failed"
        return 1
    fi
 }
 # Install Metrics Server
 install_metrics_server() {
    print_status "Installing Metrics Server..."
    # Apply metrics server with Kind-specific configuration
    kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
    # Check if patch file exists
    local patch_file="metrics-server-patch.yaml"
    if [[ ! -f "$patch_file" ]]; then
        print_error "Patch file not found: $patch_file"
        return 1
    fi
    # Patch metrics server for Kind (disable TLS verification)
    print_status "Patching Metrics Server for Kind compatibility using manifest file..."
    # Wait for metrics server deployment to exist
    local max_wait=30
    local wait_count=0
    while ! kubectl get deployment metrics-server -n kube-system &> /dev/null; do
        if [ $wait_count -ge $max_wait ]; then
            print_error "Metrics server deployment not found after waiting"
            return 1
        fi
        sleep 2
        wait_count=$((wait_count + 1))
    done
    # Apply the patch
    if kubectl patch deployment metrics-server -n kube-system --patch-file "$patch_file"; then
        print_success "Successfully patched metrics-server configuration"
    else
        print_warning "Failed to patch metrics-server, but continuing..."
    fi
    # Wait for metrics server to be ready
    print_status "Waiting for Metrics Server to be ready..."
    kubectl wait --for=condition=available deployment/metrics-server -n kube-system --timeout=300s
    if [ $? -eq 0 ]; then
        print_success "Metrics Server installed and ready"
        # Test metrics server
        print_status "Testing metrics server..."
        sleep 10  # Give metrics server time to collect initial metrics
        if kubectl top nodes &> /dev/null; then
            print_success "Metrics Server is working correctly"
        else
            print_warning "Metrics Server installed but may need more time to collect metrics"
        fi
    else
        print_warning "Metrics Server installation completed but readiness check failed"
    fi
 }
 # Apply Kubernetes manifests
 apply_manifests() {
    print_status "Applying Kubernetes manifests..."
@@ -351,6 +463,8 @@ main() {
    create_cluster
    verify_cri_status
    install_ingress_controller
    patch_ingress_resources
    install_metrics_server
    apply_manifests
    configure_registry_certificates
    verify_cluster