Files
eveAI/k8s/scripts/dependency-checks.sh
2025-08-18 11:44:23 +02:00

309 lines
10 KiB
Bash

#!/bin/bash
# Kubernetes Dependency Checking
# File: dependency-checks.sh
# Check if a service is ready
check_service_ready() {
local service=$1
local namespace=${2:-$K8S_NAMESPACE}
local timeout=${3:-60}
log_operation "INFO" "Checking if service '$service' is ready in namespace '$namespace'"
# Check if deployment exists
if ! kubectl get deployment "$service" -n "$namespace" &>/dev/null; then
log_dependency_check "$service" "NOT_FOUND" "Deployment does not exist"
return 1
fi
# Check if deployment is ready
local ready_replicas
ready_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)
local desired_replicas
desired_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.spec.replicas}' 2>/dev/null)
if [[ -z "$ready_replicas" ]]; then
ready_replicas=0
fi
if [[ -z "$desired_replicas" ]]; then
desired_replicas=1
fi
if [[ "$ready_replicas" -eq "$desired_replicas" && "$ready_replicas" -gt 0 ]]; then
log_dependency_check "$service" "READY" "All $ready_replicas/$desired_replicas replicas are ready"
return 0
else
log_dependency_check "$service" "NOT_READY" "Only $ready_replicas/$desired_replicas replicas are ready"
return 1
fi
}
# Wait for a service to become ready
wait_for_service_ready() {
local service=$1
local namespace=${2:-$K8S_NAMESPACE}
local timeout=${3:-300}
local check_interval=${4:-10}
log_operation "INFO" "Waiting for service '$service' to become ready (timeout: ${timeout}s)"
local elapsed=0
while [[ $elapsed -lt $timeout ]]; do
if check_service_ready "$service" "$namespace" 0; then
log_operation "SUCCESS" "Service '$service' is ready after ${elapsed}s"
return 0
fi
log_operation "DEBUG" "Service '$service' not ready yet, waiting ${check_interval}s... (${elapsed}/${timeout}s)"
sleep "$check_interval"
elapsed=$((elapsed + check_interval))
done
log_operation "ERROR" "Service '$service' failed to become ready within ${timeout}s"
return 1
}
# Check if infrastructure services are ready
check_infrastructure_ready() {
log_operation "INFO" "Checking infrastructure readiness"
local infrastructure_services
infrastructure_services=$(get_services_in_group "infrastructure")
if [[ $? -ne 0 ]]; then
log_operation "ERROR" "Failed to get infrastructure services"
return 1
fi
local all_ready=true
for service in $infrastructure_services; do
if ! check_service_ready "$service" "$K8S_NAMESPACE" 0; then
all_ready=false
log_operation "WARNING" "Infrastructure service '$service' is not ready"
fi
done
if [[ "$all_ready" == "true" ]]; then
log_operation "SUCCESS" "All infrastructure services are ready"
return 0
else
log_operation "ERROR" "Some infrastructure services are not ready"
log_operation "INFO" "You may need to start infrastructure first: kup infrastructure"
return 1
fi
}
# Check app-specific dependencies
check_app_dependencies() {
local service=$1
log_operation "INFO" "Checking dependencies for service '$service'"
case "$service" in
"eveai-workers"|"eveai-chat-workers")
# Workers need API to be running
if ! check_service_ready "eveai-api" "$K8S_NAMESPACE" 0; then
log_operation "ERROR" "Service '$service' requires eveai-api to be running"
log_operation "INFO" "Start API first: kup-api"
return 1
fi
;;
"eveai-beat")
# Beat needs Redis to be running
if ! check_service_ready "redis" "$K8S_NAMESPACE" 0; then
log_operation "ERROR" "Service '$service' requires redis to be running"
log_operation "INFO" "Start infrastructure first: kup infrastructure"
return 1
fi
;;
"eveai-app"|"eveai-api"|"eveai-chat-client"|"eveai-entitlements")
# Core apps need infrastructure
if ! check_infrastructure_ready; then
log_operation "ERROR" "Service '$service' requires infrastructure to be running"
return 1
fi
;;
*)
log_operation "DEBUG" "No specific dependencies defined for service '$service'"
;;
esac
log_operation "SUCCESS" "All dependencies satisfied for service '$service'"
return 0
}
# Check if a pod is running and ready
check_pod_ready() {
local pod_selector=$1
local namespace=${2:-$K8S_NAMESPACE}
local pods
pods=$(kubectl get pods -l "$pod_selector" -n "$namespace" --no-headers 2>/dev/null)
if [[ -z "$pods" ]]; then
return 1
fi
# Check if any pod is in Running state and Ready
while IFS= read -r line; do
local status=$(echo "$line" | awk '{print $3}')
local ready=$(echo "$line" | awk '{print $2}')
if [[ "$status" == "Running" && "$ready" =~ ^[1-9]/[1-9] ]]; then
# Extract ready count and total count
local ready_count=$(echo "$ready" | cut -d'/' -f1)
local total_count=$(echo "$ready" | cut -d'/' -f2)
if [[ "$ready_count" -eq "$total_count" ]]; then
return 0
fi
fi
done <<< "$pods"
return 1
}
# Check service health endpoint
check_service_health() {
local service=$1
local namespace=${2:-$K8S_NAMESPACE}
local health_endpoint
health_endpoint=$(get_service_health_endpoint "$service")
if [[ -z "$health_endpoint" ]]; then
log_operation "DEBUG" "No health endpoint defined for service '$service'"
return 0
fi
case "$service" in
"redis")
# Check Redis with ping
if kubectl exec -n "$namespace" deployment/redis -- redis-cli ping &>/dev/null; then
log_operation "SUCCESS" "Redis health check passed"
return 0
else
log_operation "WARNING" "Redis health check failed"
return 1
fi
;;
"minio")
# Check MinIO readiness
if kubectl exec -n "$namespace" deployment/minio -- mc ready local &>/dev/null; then
log_operation "SUCCESS" "MinIO health check passed"
return 0
else
log_operation "WARNING" "MinIO health check failed"
return 1
fi
;;
*)
# For other services, try HTTP health check
if [[ "$health_endpoint" =~ ^/.*:[0-9]+$ ]]; then
local path=$(echo "$health_endpoint" | cut -d':' -f1)
local port=$(echo "$health_endpoint" | cut -d':' -f2)
# Use port-forward to check health endpoint
local pod
pod=$(kubectl get pods -l "app=$service" -n "$namespace" --no-headers -o custom-columns=":metadata.name" | head -n1)
if [[ -n "$pod" ]]; then
if timeout 10 kubectl exec -n "$namespace" "$pod" -- curl -f -s "http://localhost:$port$path" &>/dev/null; then
log_operation "SUCCESS" "Health check passed for service '$service'"
return 0
else
log_operation "WARNING" "Health check failed for service '$service'"
return 1
fi
fi
fi
;;
esac
log_operation "DEBUG" "Could not perform health check for service '$service'"
return 0
}
# Comprehensive dependency check for a service group
check_group_dependencies() {
local group=$1
log_operation "INFO" "Checking dependencies for service group '$group'"
local services
services=$(get_services_in_group "$group")
if [[ $? -ne 0 ]]; then
return 1
fi
# Sort services by deployment order
local sorted_services
read -ra service_array <<< "$services"
sorted_services=$(sort_services_by_deploy_order "${service_array[@]}")
local all_dependencies_met=true
for service in $sorted_services; do
local dependencies
dependencies=$(get_service_dependencies "$service")
for dep in $dependencies; do
if ! check_service_ready "$dep" "$K8S_NAMESPACE" 0; then
log_operation "ERROR" "Dependency '$dep' not ready for service '$service'"
all_dependencies_met=false
fi
done
# Check app-specific dependencies
if ! check_app_dependencies "$service"; then
all_dependencies_met=false
fi
done
if [[ "$all_dependencies_met" == "true" ]]; then
log_operation "SUCCESS" "All dependencies satisfied for group '$group'"
return 0
else
log_operation "ERROR" "Some dependencies not satisfied for group '$group'"
return 1
fi
}
# Show dependency status for all services
show_dependency_status() {
echo "🔍 Dependency Status Overview:"
echo "=============================="
local all_services
all_services=$(get_services_in_group "all")
for service in $all_services; do
local status="❌ NOT READY"
local health_status=""
if check_service_ready "$service" "$K8S_NAMESPACE" 0; then
status="✅ READY"
# Check health if available
if check_service_health "$service" "$K8S_NAMESPACE"; then
health_status=" (healthy)"
else
health_status=" (unhealthy)"
fi
fi
echo " $service: $status$health_status"
done
}
# Export functions for use in other scripts
if [[ -n "$ZSH_VERSION" ]]; then
typeset -f check_service_ready wait_for_service_ready check_infrastructure_ready > /dev/null
typeset -f check_app_dependencies check_pod_ready check_service_health > /dev/null
typeset -f check_group_dependencies show_dependency_status > /dev/null
else
export -f check_service_ready wait_for_service_ready check_infrastructure_ready
export -f check_app_dependencies check_pod_ready check_service_health
export -f check_group_dependencies show_dependency_status
fi