Files
eveAI/k8s/dev/setup-dev-cluster.sh
2025-08-18 11:44:23 +02:00

332 lines
10 KiB
Bash
Executable File

#!/bin/bash
# Setup script voor EveAI Dev Kind Cluster
# File: setup-dev-cluster.sh
set -e
echo "🚀 Setting up EveAI Dev Kind Cluster..."
CLUSTER_NAME="eveai-dev-cluster"
# Colors voor output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function voor colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if required tools are installed
check_prerequisites() {
print_status "Checking prerequisites..."
if ! command -v kind &> /dev/null; then
print_error "kind is not installed. Please install kind first."
echo "Install via: go install sigs.k8s.io/kind@latest"
exit 1
fi
if ! command -v kubectl &> /dev/null; then
print_error "kubectl is not installed. Please install kubectl first."
exit 1
fi
if ! command -v podman &> /dev/null; then
print_error "podman is not installed. Please install podman first."
exit 1
fi
if ! command -v envsubst &> /dev/null; then
print_error "envsubst is not installed. Please install envsubst first"
fi
print_success "All prerequisites are installed"
}
# Create host directories for persistent volumes
create_host_directories() {
print_status "Creating host directories for persistent storage..."
BASE_DIR="$HOME/k8s-data/dev"
directories=(
"$BASE_DIR/minio"
"$BASE_DIR/redis"
"$BASE_DIR/logs"
"$BASE_DIR/prometheus"
"$BASE_DIR/grafana"
"$BASE_DIR/certs"
)
for dir in "${directories[@]}"; do
if [ ! -d "$dir" ]; then
mkdir -p "$dir"
print_status "Created directory: $dir"
else
print_status "Directory already exists: $dir"
fi
done
# Set proper permissions
# chmod -R 755 "$BASE_DIR"
print_success "Host directories created and configured"
}
# Create Kind cluster
create_cluster() {
print_status "Creating Kind cluster..."
if kind get clusters | grep -q "eveai-dev-cluster"; then
print_warning "Cluster 'eveai-dev-cluster' already exists"
echo -n "Do you want to delete and recreate it? (y/N): "
read -r response
if [[ "$response" =~ ^[Yy]$ ]]; then
print_status "Deleting existing cluster..."
kind delete cluster --name eveai-dev-cluster
else
print_status "Using existing cluster"
return 0
fi
fi
KIND_CONFIG="kind-dev-cluster.yaml"
if [ ! -f "${KIND_CONFIG}" ]; then
print_error "Config '${KIND_CONFIG}' niet gevonden in $(pwd)"
exit 1
fi
print_status "Creating new Kind cluster with configuration..."
# Genereer expanded config met envsubst
EXPANDED_CONFIG="$(mktemp --suffix=.yaml)"
envsubst < "${KIND_CONFIG}" > "${EXPANDED_CONFIG}"
# Voorkeursmethode: start in user-scope met expliciete delegatie
if command -v systemd-run >/dev/null 2>&1; then
systemd-run --scope --user -p "Delegate=yes" \
env KIND_EXPERIMENTAL_PROVIDER=podman \
kind create cluster --name "${CLUSTER_NAME}" --config "${EXPANDED_CONFIG}"
else
# Fallback
print_warning "Start zonder systemd-run scope; kan mislukken bij ontbrekende delegatie."
kind create cluster --name "${CLUSTER_NAME}" --config "${EXPANDED_CONFIG}"
fi
# Cleanup temporary config
rm -f "${EXPANDED_CONFIG}"
# Wait for cluster to be ready
print_status "Waiting for cluster to be ready..."
kubectl wait --for=condition=Ready nodes --all --timeout=300s
# Update CA certificates in Kind node
if command -v podman &> /dev/null; then
podman exec eveai-dev-cluster-control-plane update-ca-certificates
podman exec eveai-dev-cluster-control-plane systemctl restart containerd
else
docker exec eveai-dev-cluster-control-plane update-ca-certificates
docker exec eveai-dev-cluster-control-plane systemctl restart containerd
fi
print_success "Kind cluster created successfully"
}
# Configure container resource limits to prevent CRI issues
configure_container_limits() {
print_status "Configuring container resource limits..."
# Configure file descriptor and inotify limits to prevent CRI plugin failures
podman exec "${CLUSTER_NAME}-control-plane" sh -c '
echo "fs.inotify.max_user_instances = 1024" >> /etc/sysctl.conf
echo "fs.inotify.max_user_watches = 524288" >> /etc/sysctl.conf
echo "fs.file-max = 2097152" >> /etc/sysctl.conf
sysctl -p
'
# Restart containerd to apply new limits
print_status "Restarting containerd with new limits..."
podman exec "${CLUSTER_NAME}-control-plane" systemctl restart containerd
# Wait for containerd to stabilize
sleep 10
# Restart kubelet to ensure proper CRI communication
podman exec "${CLUSTER_NAME}-control-plane" systemctl restart kubelet
print_success "Container limits configured and services restarted"
}
# Verify CRI status and functionality
verify_cri_status() {
print_status "Verifying CRI status..."
# Wait for services to stabilize
sleep 15
# Test CRI connectivity
if podman exec "${CLUSTER_NAME}-control-plane" crictl version &>/dev/null; then
print_success "CRI is functional"
# Show CRI version info
print_status "CRI version information:"
podman exec "${CLUSTER_NAME}-control-plane" crictl version
else
print_error "CRI is not responding - checking containerd logs"
podman exec "${CLUSTER_NAME}-control-plane" journalctl -u containerd --no-pager -n 20
print_error "Checking kubelet logs"
podman exec "${CLUSTER_NAME}-control-plane" journalctl -u kubelet --no-pager -n 10
return 1
fi
# Verify node readiness
print_status "Waiting for node to become Ready..."
local max_attempts=30
local attempt=0
while [ $attempt -lt $max_attempts ]; do
if kubectl get nodes | grep -q "Ready"; then
print_success "Node is Ready"
return 0
fi
attempt=$((attempt + 1))
print_status "Attempt $attempt/$max_attempts - waiting for node readiness..."
sleep 10
done
print_error "Node failed to become Ready within timeout"
kubectl get nodes -o wide
return 1
}
# Install Ingress Controller
install_ingress_controller() {
print_status "Installing NGINX Ingress Controller..."
# Install NGINX Ingress Controller for Kind
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.1/deploy/static/provider/kind/deploy.yaml
# Wait for Ingress Controller to be ready
print_status "Waiting for Ingress Controller to be ready..."
kubectl wait --namespace ingress-nginx \
--for=condition=ready pod \
--selector=app.kubernetes.io/component=controller \
--timeout=300s
if [ $? -eq 0 ]; then
print_success "NGINX Ingress Controller installed and ready"
else
print_error "Failed to install or start Ingress Controller"
exit 1
fi
# Verify Ingress Controller status
print_status "Ingress Controller status:"
kubectl get pods -n ingress-nginx
kubectl get services -n ingress-nginx
}
# Apply Kubernetes manifests
apply_manifests() {
print_status "Applying Kubernetes manifests..."
# Apply in correct order
manifests=(
"persistent-volumes.yaml"
"config-secrets.yaml"
)
for manifest in "${manifests[@]}"; do
if [ -f "$manifest" ]; then
print_status "Applying $manifest..."
kubectl apply -f "$manifest"
else
print_warning "Manifest $manifest not found, skipping..."
fi
done
print_success "Base manifests applied successfully"
}
# Verify cluster status
verify_cluster() {
print_status "Verifying cluster status..."
# Check nodes
print_status "Cluster nodes:"
kubectl get nodes
# Check namespaces
print_status "Namespaces:"
kubectl get namespaces
# Check persistent volumes
print_status "Persistent volumes:"
kubectl get pv
# Check if registry is accessible from cluster
print_status "Testing registry connectivity..."
if kubectl run test-registry --image=registry.ask-eve-ai-local.com/josakola/nginx:latest --dry-run=server &> /dev/null; then
print_success "Registry is accessible from cluster"
kubectl delete pod test-registry --ignore-not-found=true &> /dev/null || true
else
print_warning "Registry connectivity test failed - this might be expected if images aren't pushed yet"
fi
}
# Main execution
main() {
echo "=================================================="
echo "🏗️ EveAI Dev Kind Cluster Setup"
echo "=================================================="
check_prerequisites
create_host_directories
create_cluster
configure_container_limits
verify_cri_status
install_ingress_controller
apply_manifests
verify_cluster
echo ""
echo "=================================================="
print_success "EveAI Dev Kind Cluster setup completed!"
echo "=================================================="
echo ""
echo "📋 Next steps:"
echo "1. Deploy your application services using: ./deploy-all-services.sh"
echo "2. Access services via Ingress: http://minty.ask-eve-ai-local.com:3080"
echo ""
echo "🔧 Useful commands:"
echo " kubectl config current-context # Verify you're using the right cluster"
echo " kubectl get all -n eveai-dev # Check all resources in dev namespace"
echo " kubectl get ingress -n eveai-dev # Check Ingress resources"
echo " kind delete cluster --name eveai-dev-cluster # Delete cluster when done"
echo ""
echo "📊 Service Access (via Ingress):"
echo " - Main App: http://minty.ask-eve-ai-local.com:3080/admin/"
echo " - API: http://minty.ask-eve-ai-local.com:3080/api/"
echo " - Chat Client: http://minty.ask-eve-ai-local.com:3080/chat-client/"
echo " - Static Files: http://minty.ask-eve-ai-local.com:3080/static/"
}
# Run main function
main "$@"