diff --git a/k8s/dev/deploy-all-services.sh b/k8s/dev/deploy-all-services.sh deleted file mode 100755 index 020ba9c..0000000 --- a/k8s/dev/deploy-all-services.sh +++ /dev/null @@ -1,283 +0,0 @@ -#!/bin/bash -# Deploy All EveAI Dev Services Script -# File: deploy-all-services.sh - -set -e - -# Colors voor output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Function voor colored output -print_status() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -print_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -print_warning() { - echo -e "${YELLOW}[WARNING]${NC} $1" -} - -print_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -# Check if kubectl is pointing to the right cluster -check_cluster_context() { - print_status "Checking cluster context..." - - CURRENT_CONTEXT=$(kubectl config current-context) - if [[ "$CURRENT_CONTEXT" != "kind-eveai-dev-cluster" ]]; then - print_error "Wrong cluster context: $CURRENT_CONTEXT" - print_error "Expected: kind-eveai-dev-cluster" - echo "Switch context with: kubectl config use-context kind-eveai-dev-cluster" - exit 1 - fi - - print_success "Using correct cluster context: $CURRENT_CONTEXT" -} - -# Wait for pods to be ready -wait_for_pods() { - local namespace=$1 - local app_label=$2 - local timeout=${3:-300} - - print_status "Waiting for $app_label pods to be ready..." - - if kubectl wait --for=condition=Ready pods -l app=$app_label -n $namespace --timeout=${timeout}s; then - print_success "$app_label pods are ready" - return 0 - else - print_error "$app_label pods failed to become ready within ${timeout}s" - return 1 - fi -} - -# Deploy services in correct order -deploy_infrastructure() { - print_status "Deploying infrastructure services (Redis, MinIO)..." - - if kubectl apply -f redis-minio-services.yaml; then - print_success "Infrastructure services deployed" - else - print_error "Failed to deploy infrastructure services" - exit 1 - fi - - # Wait for infrastructure to be ready - wait_for_pods "eveai-dev" "redis" 180 - wait_for_pods "eveai-dev" "minio" 300 -} - -deploy_application_services() { - print_status "Deploying EveAI application services..." - - if kubectl apply -f eveai-services.yaml; then - print_success "Application services deployed" - else - print_error "Failed to deploy application services" - exit 1 - fi - - # Wait for key services to be ready - wait_for_pods "eveai-dev" "eveai-app" 180 - wait_for_pods "eveai-dev" "eveai-api" 180 - wait_for_pods "eveai-dev" "eveai-chat-client" 180 -} - -deploy_static_ingress() { - print_status "Deploying static files service and Ingress..." - - # Deploy static files service - if kubectl apply -f static-files-service.yaml; then - print_success "Static files service deployed" - else - print_error "Failed to deploy static files service" - exit 1 - fi - - # Deploy Ingress - if kubectl apply -f eveai-ingress.yaml; then - print_success "Ingress deployed" - else - print_error "Failed to deploy Ingress" - exit 1 - fi - - # Wait for services to be ready - wait_for_pods "eveai-dev" "static-files" 60 - - # Wait for Ingress to be ready - print_status "Waiting for Ingress to be ready..." - kubectl wait --namespace eveai-dev \ - --for=condition=ready ingress/eveai-ingress \ - --timeout=120s || print_warning "Ingress might still be starting up" -} - -deploy_monitoring_only() { - print_status "Deploying monitoring services..." - - if kubectl apply -f monitoring-services.yaml; then - print_success "Monitoring services deployed" - else - print_error "Failed to deploy monitoring services" - exit 1 - fi - - # Wait for monitoring services - wait_for_pods "eveai-dev" "flower" 120 - wait_for_pods "eveai-dev" "prometheus" 180 - wait_for_pods "eveai-dev" "grafana" 180 -} - -# Check service status -check_services() { - print_status "Checking service status..." - - echo "" - print_status "Pods status:" - kubectl get pods -n eveai-dev - - echo "" - print_status "Services status:" - kubectl get services -n eveai-dev - - echo "" - print_status "Persistent Volume Claims:" - kubectl get pvc -n eveai-dev -} - -# Test service connectivity via Ingress -test_connectivity_ingress() { - print_status "Testing Ingress connectivity..." - - # Test Ingress endpoints - endpoints=( - "http://minty.ask-eve-ai-local.com:3080/admin/" - "http://minty.ask-eve-ai-local.com:3080/api/healthz/ready" - "http://minty.ask-eve-ai-local.com:3080/chat-client/" - "http://minty.ask-eve-ai-local.com:3080/static/" - "http://localhost:3009" # MinIO Console (direct) - "http://localhost:3010" # Prometheus (direct) - "http://localhost:3012" # Grafana (direct) - ) - - for endpoint in "${endpoints[@]}"; do - print_status "Testing $endpoint..." - if curl -f -s --max-time 10 "$endpoint" > /dev/null; then - print_success "$endpoint is responding via Ingress" - else - print_warning "$endpoint is not responding (may still be starting up)" - fi - done -} - -# Test service connectivity (legacy function for backward compatibility) -test_connectivity() { - test_connectivity_ingress -} - -# Show connection information for Ingress setup -show_connection_info_ingress() { - echo "" - echo "==================================================" - print_success "EveAI Dev Cluster deployed successfully!" - echo "==================================================" - echo "" - echo "🌐 Service URLs (via Ingress):" - echo " Main Application:" - echo " • Main App: http://minty.ask-eve-ai-local.com:3080/admin/" - echo " • API: http://minty.ask-eve-ai-local.com:3080/api/" - echo " • Chat Client: http://minty.ask-eve-ai-local.com:3080/chat-client/" - echo " • Static Files: http://minty.ask-eve-ai-local.com:3080/static/" - echo "" - echo " Infrastructure:" - echo " • Redis: redis://minty.ask-eve-ai-local.com:3006" - echo " • MinIO S3: http://minty.ask-eve-ai-local.com:3008" - echo " • MinIO Console: http://minty.ask-eve-ai-local.com:3009" - echo "" - echo " Monitoring:" - echo " • Flower (Celery): http://minty.ask-eve-ai-local.com:3007" - echo " • Prometheus: http://minty.ask-eve-ai-local.com:3010" - echo " • Grafana: http://minty.ask-eve-ai-local.com:3012" - echo "" - echo "🔑 Default Credentials:" - echo " • MinIO: minioadmin / minioadmin" - echo " • Grafana: admin / admin" - echo " • Flower: Felucia / Jungles" - echo "" - echo "🛠️ Management Commands:" - echo " • kubectl get all -n eveai-dev" - echo " • kubectl get ingress -n eveai-dev" - echo " • kubectl logs -f deployment/eveai-app -n eveai-dev" - echo " • kubectl describe ingress eveai-ingress -n eveai-dev" - echo "" - echo "🗂️ Data Persistence:" - echo " • Host data path: $HOME/k8s-data/dev/" - echo " • Logs path: $HOME/k8s-data/dev/logs/" -} - -# Show connection information (legacy function for backward compatibility) -show_connection_info() { - show_connection_info_ingress -} - -# Main execution -main() { - echo "==================================================" - echo "🚀 Deploying EveAI Dev Services to Kind Cluster" - echo "==================================================" - - check_cluster_context - - # Deploy in stages - deploy_infrastructure - print_status "Infrastructure deployment completed, proceeding with applications..." - sleep 5 - - deploy_application_services - print_status "Application deployment completed, proceeding with Nginx and monitoring..." - sleep 5 - - deploy_static_ingress - deploy_monitoring_only - print_status "All services deployed, running final checks..." - sleep 10 - - check_services - test_connectivity_ingress - show_connection_info_ingress -} - -# Check for command line options -case "${1:-}" in - "infrastructure") - check_cluster_context - deploy_infrastructure - ;; - "apps") - check_cluster_context - deploy_application_services - ;; - "monitoring") - check_cluster_context - deploy_nginx_monitoring - ;; - "status") - check_cluster_context - check_services - ;; - "test") - test_connectivity - ;; - *) - main "$@" - ;; -esac \ No newline at end of file diff --git a/k8s/dev/eveai-services.yaml b/k8s/dev/eveai-services.yaml index 96d827f..5df073a 100644 --- a/k8s/dev/eveai-services.yaml +++ b/k8s/dev/eveai-services.yaml @@ -34,6 +34,7 @@ spec: selector: matchLabels: app: eveai-app + tier: frontend template: metadata: labels: @@ -119,6 +120,7 @@ spec: selector: matchLabels: app: eveai-api + tier: frontend template: metadata: labels: @@ -204,6 +206,7 @@ spec: selector: matchLabels: app: eveai-chat-client + tier: frontend template: metadata: labels: @@ -440,6 +443,7 @@ spec: selector: matchLabels: app: eveai-entitlements + tier: backend template: metadata: labels: diff --git a/k8s/dev/ingress-nginx-resources-patch.yaml b/k8s/dev/ingress-nginx-resources-patch.yaml new file mode 100644 index 0000000..e26c868 --- /dev/null +++ b/k8s/dev/ingress-nginx-resources-patch.yaml @@ -0,0 +1,19 @@ +# Ingress-NGINX Controller Resource Patch +# File: ingress-nginx-resources-patch.yaml +# Purpose: Patch the ingress-nginx-controller deployment with higher resource limits +# to prevent pthread_create() failures and worker process crashes +# +# This is a strategic merge patch that will be applied using: +# kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file= +spec: + template: + spec: + containers: + - name: controller + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: 2000m + memory: 2Gi diff --git a/k8s/dev/metrics-server-patch.yaml b/k8s/dev/metrics-server-patch.yaml new file mode 100644 index 0000000..6543dd6 --- /dev/null +++ b/k8s/dev/metrics-server-patch.yaml @@ -0,0 +1,19 @@ +# Metrics Server Patch for Kind Compatibility +# File: metrics-server-patch.yaml +# Purpose: Patch the metrics-server deployment with Kind-specific configuration +# and appropriate resource limits for development environment +# +# This is a strategic merge patch that will be applied using: +# kubectl patch deployment metrics-server -n kube-system --patch-file= +spec: + template: + spec: + containers: + - name: metrics-server + args: + - --cert-dir=/tmp + - --secure-port=10250 + - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname + - --kubelet-use-node-status-port + - --metric-resolution=15s + - --kubelet-insecure-tls \ No newline at end of file diff --git a/k8s/dev/setup-dev-cluster.sh b/k8s/dev/setup-dev-cluster.sh index 8c11bea..ecc7e25 100755 --- a/k8s/dev/setup-dev-cluster.sh +++ b/k8s/dev/setup-dev-cluster.sh @@ -233,6 +233,118 @@ install_ingress_controller() { kubectl get services -n ingress-nginx } +# Patch Ingress Controller Resources +patch_ingress_resources() { + print_status "Patching Ingress Controller resources..." + + # Wait a moment for the deployment to be fully created + sleep 5 + + # Check if patch file exists + local patch_file="ingress-nginx-resources-patch.yaml" + if [[ ! -f "$patch_file" ]]; then + print_error "Patch file not found: $patch_file" + return 1 + fi + + # Patch the ingress-nginx-controller deployment with higher resource limits + print_status "Updating resource limits for ingress-nginx-controller using manifest file..." + + # Apply patch with retry logic + local max_attempts=5 + local attempt=1 + local success=false + + while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do + print_status "Attempt $attempt/$max_attempts - patching ingress controller resources..." + + if kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file "$patch_file"; then + print_success "Successfully patched ingress-nginx-controller resources" + success=true + else + if [ $attempt -lt $max_attempts ]; then + print_warning "Patch attempt $attempt failed, retrying in 5 seconds..." + sleep 5 + attempt=$((attempt + 1)) + else + print_error "Failed to patch ingress-nginx-controller resources after $max_attempts attempts" + return 1 + fi + fi + done + + # Wait for rollout to complete + print_status "Waiting for ingress controller rollout to complete..." + kubectl rollout status deployment/ingress-nginx-controller -n ingress-nginx --timeout=300s + + if [ $? -eq 0 ]; then + print_success "Ingress Controller resource patch completed successfully" + + # Verify the new resource settings + print_status "Verifying new resource settings..." + kubectl describe deployment ingress-nginx-controller -n ingress-nginx | grep -A 10 "Limits:\|Requests:" || true + else + print_error "Ingress Controller rollout failed" + return 1 + fi +} + +# Install Metrics Server +install_metrics_server() { + print_status "Installing Metrics Server..." + + # Apply metrics server with Kind-specific configuration + kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml + + # Check if patch file exists + local patch_file="metrics-server-patch.yaml" + if [[ ! -f "$patch_file" ]]; then + print_error "Patch file not found: $patch_file" + return 1 + fi + + # Patch metrics server for Kind (disable TLS verification) + print_status "Patching Metrics Server for Kind compatibility using manifest file..." + + # Wait for metrics server deployment to exist + local max_wait=30 + local wait_count=0 + while ! kubectl get deployment metrics-server -n kube-system &> /dev/null; do + if [ $wait_count -ge $max_wait ]; then + print_error "Metrics server deployment not found after waiting" + return 1 + fi + sleep 2 + wait_count=$((wait_count + 1)) + done + + # Apply the patch + if kubectl patch deployment metrics-server -n kube-system --patch-file "$patch_file"; then + print_success "Successfully patched metrics-server configuration" + else + print_warning "Failed to patch metrics-server, but continuing..." + fi + + # Wait for metrics server to be ready + print_status "Waiting for Metrics Server to be ready..." + kubectl wait --for=condition=available deployment/metrics-server -n kube-system --timeout=300s + + if [ $? -eq 0 ]; then + print_success "Metrics Server installed and ready" + + # Test metrics server + print_status "Testing metrics server..." + sleep 10 # Give metrics server time to collect initial metrics + if kubectl top nodes &> /dev/null; then + print_success "Metrics Server is working correctly" + else + print_warning "Metrics Server installed but may need more time to collect metrics" + fi + else + print_warning "Metrics Server installation completed but readiness check failed" + fi +} + # Apply Kubernetes manifests apply_manifests() { print_status "Applying Kubernetes manifests..." @@ -351,6 +463,8 @@ main() { create_cluster verify_cri_status install_ingress_controller + patch_ingress_resources + install_metrics_server apply_manifests configure_registry_certificates verify_cluster