- Metrics service toegevoegd

- Applicatie services starten op, behalve eveai_chat_client - Connectiviteit naar admin / eveai_app niet functioneel
2025-08-20 11:49:19 +02:00
parent d6a2635e50
commit 9c63ecb17f
5 changed files with 156 additions and 283 deletions
--- a/k8s/dev/deploy-all-services.sh
+++ b/k8s/dev/deploy-all-services.sh
@@ -1,283 +0,0 @@
-#!/bin/bash
-# Deploy All EveAI Dev Services Script
-# File: deploy-all-services.sh
-
-set -e
-
-# Colors voor output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Function voor colored output
-print_status() {
-    echo -e "${BLUE}[INFO]${NC} $1"
-}
-
-print_success() {
-    echo -e "${GREEN}[SUCCESS]${NC} $1"
-}
-
-print_warning() {
-    echo -e "${YELLOW}[WARNING]${NC} $1"
-}
-
-print_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# Check if kubectl is pointing to the right cluster
-check_cluster_context() {
-    print_status "Checking cluster context..."
-
-    CURRENT_CONTEXT=$(kubectl config current-context)
-    if [[ "$CURRENT_CONTEXT" != "kind-eveai-dev-cluster" ]]; then
-        print_error "Wrong cluster context: $CURRENT_CONTEXT"
-        print_error "Expected: kind-eveai-dev-cluster"
-        echo "Switch context with: kubectl config use-context kind-eveai-dev-cluster"
-        exit 1
-    fi
-
-    print_success "Using correct cluster context: $CURRENT_CONTEXT"
-}
-
-# Wait for pods to be ready
-wait_for_pods() {
-    local namespace=$1
-    local app_label=$2
-    local timeout=${3:-300}
-
-    print_status "Waiting for $app_label pods to be ready..."
-
-    if kubectl wait --for=condition=Ready pods -l app=$app_label -n $namespace --timeout=${timeout}s; then
-        print_success "$app_label pods are ready"
-        return 0
-    else
-        print_error "$app_label pods failed to become ready within ${timeout}s"
-        return 1
-    fi
-}
-
-# Deploy services in correct order
-deploy_infrastructure() {
-    print_status "Deploying infrastructure services (Redis, MinIO)..."
-
-    if kubectl apply -f redis-minio-services.yaml; then
-        print_success "Infrastructure services deployed"
-    else
-        print_error "Failed to deploy infrastructure services"
-        exit 1
-    fi
-
-    # Wait for infrastructure to be ready
-    wait_for_pods "eveai-dev" "redis" 180
-    wait_for_pods "eveai-dev" "minio" 300
-}
-
-deploy_application_services() {
-    print_status "Deploying EveAI application services..."
-
-    if kubectl apply -f eveai-services.yaml; then
-        print_success "Application services deployed"
-    else
-        print_error "Failed to deploy application services"
-        exit 1
-    fi
-
-    # Wait for key services to be ready
-    wait_for_pods "eveai-dev" "eveai-app" 180
-    wait_for_pods "eveai-dev" "eveai-api" 180
-    wait_for_pods "eveai-dev" "eveai-chat-client" 180
-}
-
-deploy_static_ingress() {
-    print_status "Deploying static files service and Ingress..."
-
-    # Deploy static files service
-    if kubectl apply -f static-files-service.yaml; then
-        print_success "Static files service deployed"
-    else
-        print_error "Failed to deploy static files service"
-        exit 1
-    fi
-
-    # Deploy Ingress
-    if kubectl apply -f eveai-ingress.yaml; then
-        print_success "Ingress deployed"
-    else
-        print_error "Failed to deploy Ingress"
-        exit 1
-    fi
-
-    # Wait for services to be ready
-    wait_for_pods "eveai-dev" "static-files" 60
-    
-    # Wait for Ingress to be ready
-    print_status "Waiting for Ingress to be ready..."
-    kubectl wait --namespace eveai-dev \
-      --for=condition=ready ingress/eveai-ingress \
-      --timeout=120s || print_warning "Ingress might still be starting up"
-}
-
-deploy_monitoring_only() {
-    print_status "Deploying monitoring services..."
-
-    if kubectl apply -f monitoring-services.yaml; then
-        print_success "Monitoring services deployed"
-    else
-        print_error "Failed to deploy monitoring services"
-        exit 1
-    fi
-
-    # Wait for monitoring services
-    wait_for_pods "eveai-dev" "flower" 120
-    wait_for_pods "eveai-dev" "prometheus" 180
-    wait_for_pods "eveai-dev" "grafana" 180
-}
-
-# Check service status
-check_services() {
-    print_status "Checking service status..."
-
-    echo ""
-    print_status "Pods status:"
-    kubectl get pods -n eveai-dev
-
-    echo ""
-    print_status "Services status:"
-    kubectl get services -n eveai-dev
-
-    echo ""
-    print_status "Persistent Volume Claims:"
-    kubectl get pvc -n eveai-dev
-}
-
-# Test service connectivity via Ingress
-test_connectivity_ingress() {
-    print_status "Testing Ingress connectivity..."
-
-    # Test Ingress endpoints
-    endpoints=(
-        "http://minty.ask-eve-ai-local.com:3080/admin/"
-        "http://minty.ask-eve-ai-local.com:3080/api/healthz/ready"
-        "http://minty.ask-eve-ai-local.com:3080/chat-client/"
-        "http://minty.ask-eve-ai-local.com:3080/static/"
-        "http://localhost:3009"  # MinIO Console (direct)
-        "http://localhost:3010"  # Prometheus (direct)
-        "http://localhost:3012"  # Grafana (direct)
-    )
-
-    for endpoint in "${endpoints[@]}"; do
-        print_status "Testing $endpoint..."
-        if curl -f -s --max-time 10 "$endpoint" > /dev/null; then
-            print_success "$endpoint is responding via Ingress"
-        else
-            print_warning "$endpoint is not responding (may still be starting up)"
-        fi
-    done
-}
-
-# Test service connectivity (legacy function for backward compatibility)
-test_connectivity() {
-    test_connectivity_ingress
-}
-
-# Show connection information for Ingress setup
-show_connection_info_ingress() {
-    echo ""
-    echo "=================================================="
-    print_success "EveAI Dev Cluster deployed successfully!"
-    echo "=================================================="
-    echo ""
-    echo "🌐 Service URLs (via Ingress):"
-    echo "  Main Application:"
-    echo "    • Main App:         http://minty.ask-eve-ai-local.com:3080/admin/"
-    echo "    • API:              http://minty.ask-eve-ai-local.com:3080/api/"
-    echo "    • Chat Client:      http://minty.ask-eve-ai-local.com:3080/chat-client/"
-    echo "    • Static Files:     http://minty.ask-eve-ai-local.com:3080/static/"
-    echo ""
-    echo "  Infrastructure:"
-    echo "    • Redis:            redis://minty.ask-eve-ai-local.com:3006"
-    echo "    • MinIO S3:         http://minty.ask-eve-ai-local.com:3008"
-    echo "    • MinIO Console:    http://minty.ask-eve-ai-local.com:3009"
-    echo ""
-    echo "  Monitoring:"
-    echo "    • Flower (Celery):  http://minty.ask-eve-ai-local.com:3007"
-    echo "    • Prometheus:       http://minty.ask-eve-ai-local.com:3010"
-    echo "    • Grafana:          http://minty.ask-eve-ai-local.com:3012"
-    echo ""
-    echo "🔑 Default Credentials:"
-    echo "  • MinIO:    minioadmin / minioadmin"
-    echo "  • Grafana:  admin / admin"
-    echo "  • Flower:   Felucia / Jungles"
-    echo ""
-    echo "🛠️  Management Commands:"
-    echo "  • kubectl get all -n eveai-dev"
-    echo "  • kubectl get ingress -n eveai-dev"
-    echo "  • kubectl logs -f deployment/eveai-app -n eveai-dev"
-    echo "  • kubectl describe ingress eveai-ingress -n eveai-dev"
-    echo ""
-    echo "🗂️  Data Persistence:"
-    echo "  • Host data path: $HOME/k8s-data/dev/"
-    echo "  • Logs path:      $HOME/k8s-data/dev/logs/"
-}
-
-# Show connection information (legacy function for backward compatibility)
-show_connection_info() {
-    show_connection_info_ingress
-}
-
-# Main execution
-main() {
-    echo "=================================================="
-    echo "🚀 Deploying EveAI Dev Services to Kind Cluster"
-    echo "=================================================="
-
-    check_cluster_context
-
-    # Deploy in stages
-    deploy_infrastructure
-    print_status "Infrastructure deployment completed, proceeding with applications..."
-    sleep 5
-
-    deploy_application_services
-    print_status "Application deployment completed, proceeding with Nginx and monitoring..."
-    sleep 5
-
-    deploy_static_ingress
-    deploy_monitoring_only
-    print_status "All services deployed, running final checks..."
-    sleep 10
-
-    check_services
-    test_connectivity_ingress
-    show_connection_info_ingress
-}
-
-# Check for command line options
-case "${1:-}" in
-    "infrastructure")
-        check_cluster_context
-        deploy_infrastructure
-        ;;
-    "apps")
-        check_cluster_context
-        deploy_application_services
-        ;;
-    "monitoring")
-        check_cluster_context
-        deploy_nginx_monitoring
-        ;;
-    "status")
-        check_cluster_context
-        check_services
-        ;;
-    "test")
-        test_connectivity
-        ;;
-    *)
-        main "$@"
-        ;;
-esac
--- a/k8s/dev/eveai-services.yaml
+++ b/k8s/dev/eveai-services.yaml
@@ -34,6 +34,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-app
+      tier: frontend
  template:
    metadata:
      labels:
@@ -119,6 +120,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-api
+      tier: frontend
  template:
    metadata:
      labels:
@@ -204,6 +206,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-chat-client
+      tier: frontend
  template:
    metadata:
      labels:
@@ -440,6 +443,7 @@ spec:
  selector:
    matchLabels:
      app: eveai-entitlements
+      tier: backend
  template:
    metadata:
      labels:
--- a/k8s/dev/ingress-nginx-resources-patch.yaml
+++ b/k8s/dev/ingress-nginx-resources-patch.yaml
@@ -0,0 +1,19 @@
+# Ingress-NGINX Controller Resource Patch
+# File: ingress-nginx-resources-patch.yaml
+# Purpose: Patch the ingress-nginx-controller deployment with higher resource limits
+#          to prevent pthread_create() failures and worker process crashes
+#
+# This is a strategic merge patch that will be applied using:
+# kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file=<this-file>
+spec:
+  template:
+    spec:
+      containers:
+      - name: controller
+        resources:
+          requests:
+            cpu: 500m
+            memory: 512Mi
+          limits:
+            cpu: 2000m
+            memory: 2Gi
--- a/k8s/dev/metrics-server-patch.yaml
+++ b/k8s/dev/metrics-server-patch.yaml
@@ -0,0 +1,19 @@
+# Metrics Server Patch for Kind Compatibility
+# File: metrics-server-patch.yaml
+# Purpose: Patch the metrics-server deployment with Kind-specific configuration
+#          and appropriate resource limits for development environment
+#
+# This is a strategic merge patch that will be applied using:
+# kubectl patch deployment metrics-server -n kube-system --patch-file=<this-file>
+spec:
+  template:
+    spec:
+      containers:
+      - name: metrics-server
+        args:
+          - --cert-dir=/tmp
+          - --secure-port=10250
+          - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
+          - --kubelet-use-node-status-port
+          - --metric-resolution=15s
+          - --kubelet-insecure-tls
--- a/k8s/dev/setup-dev-cluster.sh
+++ b/k8s/dev/setup-dev-cluster.sh
@@ -233,6 +233,118 @@ install_ingress_controller() {
    kubectl get services -n ingress-nginx
 }

+# Patch Ingress Controller Resources
+patch_ingress_resources() {
+    print_status "Patching Ingress Controller resources..."
+    
+    # Wait a moment for the deployment to be fully created
+    sleep 5
+    
+    # Check if patch file exists
+    local patch_file="ingress-nginx-resources-patch.yaml"
+    if [[ ! -f "$patch_file" ]]; then
+        print_error "Patch file not found: $patch_file"
+        return 1
+    fi
+    
+    # Patch the ingress-nginx-controller deployment with higher resource limits
+    print_status "Updating resource limits for ingress-nginx-controller using manifest file..."
+    
+    # Apply patch with retry logic
+    local max_attempts=5
+    local attempt=1
+    local success=false
+    
+    while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do
+        print_status "Attempt $attempt/$max_attempts - patching ingress controller resources..."
+        
+        if kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file "$patch_file"; then
+            print_success "Successfully patched ingress-nginx-controller resources"
+            success=true
+        else
+            if [ $attempt -lt $max_attempts ]; then
+                print_warning "Patch attempt $attempt failed, retrying in 5 seconds..."
+                sleep 5
+                attempt=$((attempt + 1))
+            else
+                print_error "Failed to patch ingress-nginx-controller resources after $max_attempts attempts"
+                return 1
+            fi
+        fi
+    done
+    
+    # Wait for rollout to complete
+    print_status "Waiting for ingress controller rollout to complete..."
+    kubectl rollout status deployment/ingress-nginx-controller -n ingress-nginx --timeout=300s
+    
+    if [ $? -eq 0 ]; then
+        print_success "Ingress Controller resource patch completed successfully"
+        
+        # Verify the new resource settings
+        print_status "Verifying new resource settings..."
+        kubectl describe deployment ingress-nginx-controller -n ingress-nginx | grep -A 10 "Limits:\|Requests:" || true
+    else
+        print_error "Ingress Controller rollout failed"
+        return 1
+    fi
+}
+
+# Install Metrics Server
+install_metrics_server() {
+    print_status "Installing Metrics Server..."
+    
+    # Apply metrics server with Kind-specific configuration
+    kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
+    
+    # Check if patch file exists
+    local patch_file="metrics-server-patch.yaml"
+    if [[ ! -f "$patch_file" ]]; then
+        print_error "Patch file not found: $patch_file"
+        return 1
+    fi
+    
+    # Patch metrics server for Kind (disable TLS verification)
+    print_status "Patching Metrics Server for Kind compatibility using manifest file..."
+    
+    # Wait for metrics server deployment to exist
+    local max_wait=30
+    local wait_count=0
+    while ! kubectl get deployment metrics-server -n kube-system &> /dev/null; do
+        if [ $wait_count -ge $max_wait ]; then
+            print_error "Metrics server deployment not found after waiting"
+            return 1
+        fi
+        sleep 2
+        wait_count=$((wait_count + 1))
+    done
+    
+    # Apply the patch
+    if kubectl patch deployment metrics-server -n kube-system --patch-file "$patch_file"; then
+        print_success "Successfully patched metrics-server configuration"
+    else
+        print_warning "Failed to patch metrics-server, but continuing..."
+    fi
+    
+    # Wait for metrics server to be ready
+    print_status "Waiting for Metrics Server to be ready..."
+    kubectl wait --for=condition=available deployment/metrics-server -n kube-system --timeout=300s
+    
+    if [ $? -eq 0 ]; then
+        print_success "Metrics Server installed and ready"
+        
+        # Test metrics server
+        print_status "Testing metrics server..."
+        sleep 10  # Give metrics server time to collect initial metrics
+        if kubectl top nodes &> /dev/null; then
+            print_success "Metrics Server is working correctly"
+        else
+            print_warning "Metrics Server installed but may need more time to collect metrics"
+        fi
+    else
+        print_warning "Metrics Server installation completed but readiness check failed"
+    fi
+}
+
 # Apply Kubernetes manifests
 apply_manifests() {
    print_status "Applying Kubernetes manifests..."
@@ -351,6 +463,8 @@ main() {
    create_cluster
    verify_cri_status
    install_ingress_controller
+    patch_ingress_resources
+    install_metrics_server
    apply_manifests
    configure_registry_certificates
    verify_cluster