- Metrics service toegevoegd

- Applicatie services starten op, behalve eveai_chat_client
- Connectiviteit naar admin / eveai_app niet functioneel
This commit is contained in:
Josako
2025-08-20 11:49:19 +02:00
parent d6a2635e50
commit 9c63ecb17f
5 changed files with 156 additions and 283 deletions

View File

@@ -1,283 +0,0 @@
#!/bin/bash
# Deploy All EveAI Dev Services Script
# File: deploy-all-services.sh
set -e
# Colors voor output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function voor colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if kubectl is pointing to the right cluster
check_cluster_context() {
print_status "Checking cluster context..."
CURRENT_CONTEXT=$(kubectl config current-context)
if [[ "$CURRENT_CONTEXT" != "kind-eveai-dev-cluster" ]]; then
print_error "Wrong cluster context: $CURRENT_CONTEXT"
print_error "Expected: kind-eveai-dev-cluster"
echo "Switch context with: kubectl config use-context kind-eveai-dev-cluster"
exit 1
fi
print_success "Using correct cluster context: $CURRENT_CONTEXT"
}
# Wait for pods to be ready
wait_for_pods() {
local namespace=$1
local app_label=$2
local timeout=${3:-300}
print_status "Waiting for $app_label pods to be ready..."
if kubectl wait --for=condition=Ready pods -l app=$app_label -n $namespace --timeout=${timeout}s; then
print_success "$app_label pods are ready"
return 0
else
print_error "$app_label pods failed to become ready within ${timeout}s"
return 1
fi
}
# Deploy services in correct order
deploy_infrastructure() {
print_status "Deploying infrastructure services (Redis, MinIO)..."
if kubectl apply -f redis-minio-services.yaml; then
print_success "Infrastructure services deployed"
else
print_error "Failed to deploy infrastructure services"
exit 1
fi
# Wait for infrastructure to be ready
wait_for_pods "eveai-dev" "redis" 180
wait_for_pods "eveai-dev" "minio" 300
}
deploy_application_services() {
print_status "Deploying EveAI application services..."
if kubectl apply -f eveai-services.yaml; then
print_success "Application services deployed"
else
print_error "Failed to deploy application services"
exit 1
fi
# Wait for key services to be ready
wait_for_pods "eveai-dev" "eveai-app" 180
wait_for_pods "eveai-dev" "eveai-api" 180
wait_for_pods "eveai-dev" "eveai-chat-client" 180
}
deploy_static_ingress() {
print_status "Deploying static files service and Ingress..."
# Deploy static files service
if kubectl apply -f static-files-service.yaml; then
print_success "Static files service deployed"
else
print_error "Failed to deploy static files service"
exit 1
fi
# Deploy Ingress
if kubectl apply -f eveai-ingress.yaml; then
print_success "Ingress deployed"
else
print_error "Failed to deploy Ingress"
exit 1
fi
# Wait for services to be ready
wait_for_pods "eveai-dev" "static-files" 60
# Wait for Ingress to be ready
print_status "Waiting for Ingress to be ready..."
kubectl wait --namespace eveai-dev \
--for=condition=ready ingress/eveai-ingress \
--timeout=120s || print_warning "Ingress might still be starting up"
}
deploy_monitoring_only() {
print_status "Deploying monitoring services..."
if kubectl apply -f monitoring-services.yaml; then
print_success "Monitoring services deployed"
else
print_error "Failed to deploy monitoring services"
exit 1
fi
# Wait for monitoring services
wait_for_pods "eveai-dev" "flower" 120
wait_for_pods "eveai-dev" "prometheus" 180
wait_for_pods "eveai-dev" "grafana" 180
}
# Check service status
check_services() {
print_status "Checking service status..."
echo ""
print_status "Pods status:"
kubectl get pods -n eveai-dev
echo ""
print_status "Services status:"
kubectl get services -n eveai-dev
echo ""
print_status "Persistent Volume Claims:"
kubectl get pvc -n eveai-dev
}
# Test service connectivity via Ingress
test_connectivity_ingress() {
print_status "Testing Ingress connectivity..."
# Test Ingress endpoints
endpoints=(
"http://minty.ask-eve-ai-local.com:3080/admin/"
"http://minty.ask-eve-ai-local.com:3080/api/healthz/ready"
"http://minty.ask-eve-ai-local.com:3080/chat-client/"
"http://minty.ask-eve-ai-local.com:3080/static/"
"http://localhost:3009" # MinIO Console (direct)
"http://localhost:3010" # Prometheus (direct)
"http://localhost:3012" # Grafana (direct)
)
for endpoint in "${endpoints[@]}"; do
print_status "Testing $endpoint..."
if curl -f -s --max-time 10 "$endpoint" > /dev/null; then
print_success "$endpoint is responding via Ingress"
else
print_warning "$endpoint is not responding (may still be starting up)"
fi
done
}
# Test service connectivity (legacy function for backward compatibility)
test_connectivity() {
test_connectivity_ingress
}
# Show connection information for Ingress setup
show_connection_info_ingress() {
echo ""
echo "=================================================="
print_success "EveAI Dev Cluster deployed successfully!"
echo "=================================================="
echo ""
echo "🌐 Service URLs (via Ingress):"
echo " Main Application:"
echo " • Main App: http://minty.ask-eve-ai-local.com:3080/admin/"
echo " • API: http://minty.ask-eve-ai-local.com:3080/api/"
echo " • Chat Client: http://minty.ask-eve-ai-local.com:3080/chat-client/"
echo " • Static Files: http://minty.ask-eve-ai-local.com:3080/static/"
echo ""
echo " Infrastructure:"
echo " • Redis: redis://minty.ask-eve-ai-local.com:3006"
echo " • MinIO S3: http://minty.ask-eve-ai-local.com:3008"
echo " • MinIO Console: http://minty.ask-eve-ai-local.com:3009"
echo ""
echo " Monitoring:"
echo " • Flower (Celery): http://minty.ask-eve-ai-local.com:3007"
echo " • Prometheus: http://minty.ask-eve-ai-local.com:3010"
echo " • Grafana: http://minty.ask-eve-ai-local.com:3012"
echo ""
echo "🔑 Default Credentials:"
echo " • MinIO: minioadmin / minioadmin"
echo " • Grafana: admin / admin"
echo " • Flower: Felucia / Jungles"
echo ""
echo "🛠️ Management Commands:"
echo " • kubectl get all -n eveai-dev"
echo " • kubectl get ingress -n eveai-dev"
echo " • kubectl logs -f deployment/eveai-app -n eveai-dev"
echo " • kubectl describe ingress eveai-ingress -n eveai-dev"
echo ""
echo "🗂️ Data Persistence:"
echo " • Host data path: $HOME/k8s-data/dev/"
echo " • Logs path: $HOME/k8s-data/dev/logs/"
}
# Show connection information (legacy function for backward compatibility)
show_connection_info() {
show_connection_info_ingress
}
# Main execution
main() {
echo "=================================================="
echo "🚀 Deploying EveAI Dev Services to Kind Cluster"
echo "=================================================="
check_cluster_context
# Deploy in stages
deploy_infrastructure
print_status "Infrastructure deployment completed, proceeding with applications..."
sleep 5
deploy_application_services
print_status "Application deployment completed, proceeding with Nginx and monitoring..."
sleep 5
deploy_static_ingress
deploy_monitoring_only
print_status "All services deployed, running final checks..."
sleep 10
check_services
test_connectivity_ingress
show_connection_info_ingress
}
# Check for command line options
case "${1:-}" in
"infrastructure")
check_cluster_context
deploy_infrastructure
;;
"apps")
check_cluster_context
deploy_application_services
;;
"monitoring")
check_cluster_context
deploy_nginx_monitoring
;;
"status")
check_cluster_context
check_services
;;
"test")
test_connectivity
;;
*)
main "$@"
;;
esac

View File

@@ -34,6 +34,7 @@ spec:
selector: selector:
matchLabels: matchLabels:
app: eveai-app app: eveai-app
tier: frontend
template: template:
metadata: metadata:
labels: labels:
@@ -119,6 +120,7 @@ spec:
selector: selector:
matchLabels: matchLabels:
app: eveai-api app: eveai-api
tier: frontend
template: template:
metadata: metadata:
labels: labels:
@@ -204,6 +206,7 @@ spec:
selector: selector:
matchLabels: matchLabels:
app: eveai-chat-client app: eveai-chat-client
tier: frontend
template: template:
metadata: metadata:
labels: labels:
@@ -440,6 +443,7 @@ spec:
selector: selector:
matchLabels: matchLabels:
app: eveai-entitlements app: eveai-entitlements
tier: backend
template: template:
metadata: metadata:
labels: labels:

View File

@@ -0,0 +1,19 @@
# Ingress-NGINX Controller Resource Patch
# File: ingress-nginx-resources-patch.yaml
# Purpose: Patch the ingress-nginx-controller deployment with higher resource limits
# to prevent pthread_create() failures and worker process crashes
#
# This is a strategic merge patch that will be applied using:
# kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file=<this-file>
spec:
template:
spec:
containers:
- name: controller
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 2000m
memory: 2Gi

View File

@@ -0,0 +1,19 @@
# Metrics Server Patch for Kind Compatibility
# File: metrics-server-patch.yaml
# Purpose: Patch the metrics-server deployment with Kind-specific configuration
# and appropriate resource limits for development environment
#
# This is a strategic merge patch that will be applied using:
# kubectl patch deployment metrics-server -n kube-system --patch-file=<this-file>
spec:
template:
spec:
containers:
- name: metrics-server
args:
- --cert-dir=/tmp
- --secure-port=10250
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls

View File

@@ -233,6 +233,118 @@ install_ingress_controller() {
kubectl get services -n ingress-nginx kubectl get services -n ingress-nginx
} }
# Patch Ingress Controller Resources
patch_ingress_resources() {
print_status "Patching Ingress Controller resources..."
# Wait a moment for the deployment to be fully created
sleep 5
# Check if patch file exists
local patch_file="ingress-nginx-resources-patch.yaml"
if [[ ! -f "$patch_file" ]]; then
print_error "Patch file not found: $patch_file"
return 1
fi
# Patch the ingress-nginx-controller deployment with higher resource limits
print_status "Updating resource limits for ingress-nginx-controller using manifest file..."
# Apply patch with retry logic
local max_attempts=5
local attempt=1
local success=false
while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do
print_status "Attempt $attempt/$max_attempts - patching ingress controller resources..."
if kubectl patch deployment ingress-nginx-controller -n ingress-nginx --patch-file "$patch_file"; then
print_success "Successfully patched ingress-nginx-controller resources"
success=true
else
if [ $attempt -lt $max_attempts ]; then
print_warning "Patch attempt $attempt failed, retrying in 5 seconds..."
sleep 5
attempt=$((attempt + 1))
else
print_error "Failed to patch ingress-nginx-controller resources after $max_attempts attempts"
return 1
fi
fi
done
# Wait for rollout to complete
print_status "Waiting for ingress controller rollout to complete..."
kubectl rollout status deployment/ingress-nginx-controller -n ingress-nginx --timeout=300s
if [ $? -eq 0 ]; then
print_success "Ingress Controller resource patch completed successfully"
# Verify the new resource settings
print_status "Verifying new resource settings..."
kubectl describe deployment ingress-nginx-controller -n ingress-nginx | grep -A 10 "Limits:\|Requests:" || true
else
print_error "Ingress Controller rollout failed"
return 1
fi
}
# Install Metrics Server
install_metrics_server() {
print_status "Installing Metrics Server..."
# Apply metrics server with Kind-specific configuration
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
# Check if patch file exists
local patch_file="metrics-server-patch.yaml"
if [[ ! -f "$patch_file" ]]; then
print_error "Patch file not found: $patch_file"
return 1
fi
# Patch metrics server for Kind (disable TLS verification)
print_status "Patching Metrics Server for Kind compatibility using manifest file..."
# Wait for metrics server deployment to exist
local max_wait=30
local wait_count=0
while ! kubectl get deployment metrics-server -n kube-system &> /dev/null; do
if [ $wait_count -ge $max_wait ]; then
print_error "Metrics server deployment not found after waiting"
return 1
fi
sleep 2
wait_count=$((wait_count + 1))
done
# Apply the patch
if kubectl patch deployment metrics-server -n kube-system --patch-file "$patch_file"; then
print_success "Successfully patched metrics-server configuration"
else
print_warning "Failed to patch metrics-server, but continuing..."
fi
# Wait for metrics server to be ready
print_status "Waiting for Metrics Server to be ready..."
kubectl wait --for=condition=available deployment/metrics-server -n kube-system --timeout=300s
if [ $? -eq 0 ]; then
print_success "Metrics Server installed and ready"
# Test metrics server
print_status "Testing metrics server..."
sleep 10 # Give metrics server time to collect initial metrics
if kubectl top nodes &> /dev/null; then
print_success "Metrics Server is working correctly"
else
print_warning "Metrics Server installed but may need more time to collect metrics"
fi
else
print_warning "Metrics Server installation completed but readiness check failed"
fi
}
# Apply Kubernetes manifests # Apply Kubernetes manifests
apply_manifests() { apply_manifests() {
print_status "Applying Kubernetes manifests..." print_status "Applying Kubernetes manifests..."
@@ -351,6 +463,8 @@ main() {
create_cluster create_cluster
verify_cri_status verify_cri_status
install_ingress_controller install_ingress_controller
patch_ingress_resources
install_metrics_server
apply_manifests apply_manifests
configure_registry_certificates configure_registry_certificates
verify_cluster verify_cluster