- Functional control plan
This commit is contained in:
309
k8s/scripts/dependency-checks.sh
Normal file
309
k8s/scripts/dependency-checks.sh
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/bin/bash
|
||||
# Kubernetes Dependency Checking
|
||||
# File: dependency-checks.sh
|
||||
|
||||
# Check if a service is ready
|
||||
check_service_ready() {
|
||||
local service=$1
|
||||
local namespace=${2:-$K8S_NAMESPACE}
|
||||
local timeout=${3:-60}
|
||||
|
||||
log_operation "INFO" "Checking if service '$service' is ready in namespace '$namespace'"
|
||||
|
||||
# Check if deployment exists
|
||||
if ! kubectl get deployment "$service" -n "$namespace" &>/dev/null; then
|
||||
log_dependency_check "$service" "NOT_FOUND" "Deployment does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check if deployment is ready
|
||||
local ready_replicas
|
||||
ready_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)
|
||||
local desired_replicas
|
||||
desired_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.spec.replicas}' 2>/dev/null)
|
||||
|
||||
if [[ -z "$ready_replicas" ]]; then
|
||||
ready_replicas=0
|
||||
fi
|
||||
|
||||
if [[ -z "$desired_replicas" ]]; then
|
||||
desired_replicas=1
|
||||
fi
|
||||
|
||||
if [[ "$ready_replicas" -eq "$desired_replicas" && "$ready_replicas" -gt 0 ]]; then
|
||||
log_dependency_check "$service" "READY" "All $ready_replicas/$desired_replicas replicas are ready"
|
||||
return 0
|
||||
else
|
||||
log_dependency_check "$service" "NOT_READY" "Only $ready_replicas/$desired_replicas replicas are ready"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Wait for a service to become ready
|
||||
wait_for_service_ready() {
|
||||
local service=$1
|
||||
local namespace=${2:-$K8S_NAMESPACE}
|
||||
local timeout=${3:-300}
|
||||
local check_interval=${4:-10}
|
||||
|
||||
log_operation "INFO" "Waiting for service '$service' to become ready (timeout: ${timeout}s)"
|
||||
|
||||
local elapsed=0
|
||||
while [[ $elapsed -lt $timeout ]]; do
|
||||
if check_service_ready "$service" "$namespace" 0; then
|
||||
log_operation "SUCCESS" "Service '$service' is ready after ${elapsed}s"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_operation "DEBUG" "Service '$service' not ready yet, waiting ${check_interval}s... (${elapsed}/${timeout}s)"
|
||||
sleep "$check_interval"
|
||||
elapsed=$((elapsed + check_interval))
|
||||
done
|
||||
|
||||
log_operation "ERROR" "Service '$service' failed to become ready within ${timeout}s"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Check if infrastructure services are ready
|
||||
check_infrastructure_ready() {
|
||||
log_operation "INFO" "Checking infrastructure readiness"
|
||||
|
||||
local infrastructure_services
|
||||
infrastructure_services=$(get_services_in_group "infrastructure")
|
||||
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_operation "ERROR" "Failed to get infrastructure services"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local all_ready=true
|
||||
for service in $infrastructure_services; do
|
||||
if ! check_service_ready "$service" "$K8S_NAMESPACE" 0; then
|
||||
all_ready=false
|
||||
log_operation "WARNING" "Infrastructure service '$service' is not ready"
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$all_ready" == "true" ]]; then
|
||||
log_operation "SUCCESS" "All infrastructure services are ready"
|
||||
return 0
|
||||
else
|
||||
log_operation "ERROR" "Some infrastructure services are not ready"
|
||||
log_operation "INFO" "You may need to start infrastructure first: kup infrastructure"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check app-specific dependencies
|
||||
check_app_dependencies() {
|
||||
local service=$1
|
||||
|
||||
log_operation "INFO" "Checking dependencies for service '$service'"
|
||||
|
||||
case "$service" in
|
||||
"eveai-workers"|"eveai-chat-workers")
|
||||
# Workers need API to be running
|
||||
if ! check_service_ready "eveai-api" "$K8S_NAMESPACE" 0; then
|
||||
log_operation "ERROR" "Service '$service' requires eveai-api to be running"
|
||||
log_operation "INFO" "Start API first: kup-api"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
"eveai-beat")
|
||||
# Beat needs Redis to be running
|
||||
if ! check_service_ready "redis" "$K8S_NAMESPACE" 0; then
|
||||
log_operation "ERROR" "Service '$service' requires redis to be running"
|
||||
log_operation "INFO" "Start infrastructure first: kup infrastructure"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
"eveai-app"|"eveai-api"|"eveai-chat-client"|"eveai-entitlements")
|
||||
# Core apps need infrastructure
|
||||
if ! check_infrastructure_ready; then
|
||||
log_operation "ERROR" "Service '$service' requires infrastructure to be running"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
log_operation "DEBUG" "No specific dependencies defined for service '$service'"
|
||||
;;
|
||||
esac
|
||||
|
||||
log_operation "SUCCESS" "All dependencies satisfied for service '$service'"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Check if a pod is running and ready
|
||||
check_pod_ready() {
|
||||
local pod_selector=$1
|
||||
local namespace=${2:-$K8S_NAMESPACE}
|
||||
|
||||
local pods
|
||||
pods=$(kubectl get pods -l "$pod_selector" -n "$namespace" --no-headers 2>/dev/null)
|
||||
|
||||
if [[ -z "$pods" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check if any pod is in Running state and Ready
|
||||
while IFS= read -r line; do
|
||||
local status=$(echo "$line" | awk '{print $3}')
|
||||
local ready=$(echo "$line" | awk '{print $2}')
|
||||
|
||||
if [[ "$status" == "Running" && "$ready" =~ ^[1-9]/[1-9] ]]; then
|
||||
# Extract ready count and total count
|
||||
local ready_count=$(echo "$ready" | cut -d'/' -f1)
|
||||
local total_count=$(echo "$ready" | cut -d'/' -f2)
|
||||
|
||||
if [[ "$ready_count" -eq "$total_count" ]]; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done <<< "$pods"
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
# Check service health endpoint
|
||||
check_service_health() {
|
||||
local service=$1
|
||||
local namespace=${2:-$K8S_NAMESPACE}
|
||||
|
||||
local health_endpoint
|
||||
health_endpoint=$(get_service_health_endpoint "$service")
|
||||
|
||||
if [[ -z "$health_endpoint" ]]; then
|
||||
log_operation "DEBUG" "No health endpoint defined for service '$service'"
|
||||
return 0
|
||||
fi
|
||||
|
||||
case "$service" in
|
||||
"redis")
|
||||
# Check Redis with ping
|
||||
if kubectl exec -n "$namespace" deployment/redis -- redis-cli ping &>/dev/null; then
|
||||
log_operation "SUCCESS" "Redis health check passed"
|
||||
return 0
|
||||
else
|
||||
log_operation "WARNING" "Redis health check failed"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
"minio")
|
||||
# Check MinIO readiness
|
||||
if kubectl exec -n "$namespace" deployment/minio -- mc ready local &>/dev/null; then
|
||||
log_operation "SUCCESS" "MinIO health check passed"
|
||||
return 0
|
||||
else
|
||||
log_operation "WARNING" "MinIO health check failed"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# For other services, try HTTP health check
|
||||
if [[ "$health_endpoint" =~ ^/.*:[0-9]+$ ]]; then
|
||||
local path=$(echo "$health_endpoint" | cut -d':' -f1)
|
||||
local port=$(echo "$health_endpoint" | cut -d':' -f2)
|
||||
|
||||
# Use port-forward to check health endpoint
|
||||
local pod
|
||||
pod=$(kubectl get pods -l "app=$service" -n "$namespace" --no-headers -o custom-columns=":metadata.name" | head -n1)
|
||||
|
||||
if [[ -n "$pod" ]]; then
|
||||
if timeout 10 kubectl exec -n "$namespace" "$pod" -- curl -f -s "http://localhost:$port$path" &>/dev/null; then
|
||||
log_operation "SUCCESS" "Health check passed for service '$service'"
|
||||
return 0
|
||||
else
|
||||
log_operation "WARNING" "Health check failed for service '$service'"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
log_operation "DEBUG" "Could not perform health check for service '$service'"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Comprehensive dependency check for a service group
|
||||
check_group_dependencies() {
|
||||
local group=$1
|
||||
|
||||
log_operation "INFO" "Checking dependencies for service group '$group'"
|
||||
|
||||
local services
|
||||
services=$(get_services_in_group "$group")
|
||||
|
||||
if [[ $? -ne 0 ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Sort services by deployment order
|
||||
local sorted_services
|
||||
read -ra service_array <<< "$services"
|
||||
sorted_services=$(sort_services_by_deploy_order "${service_array[@]}")
|
||||
|
||||
local all_dependencies_met=true
|
||||
for service in $sorted_services; do
|
||||
local dependencies
|
||||
dependencies=$(get_service_dependencies "$service")
|
||||
|
||||
for dep in $dependencies; do
|
||||
if ! check_service_ready "$dep" "$K8S_NAMESPACE" 0; then
|
||||
log_operation "ERROR" "Dependency '$dep' not ready for service '$service'"
|
||||
all_dependencies_met=false
|
||||
fi
|
||||
done
|
||||
|
||||
# Check app-specific dependencies
|
||||
if ! check_app_dependencies "$service"; then
|
||||
all_dependencies_met=false
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$all_dependencies_met" == "true" ]]; then
|
||||
log_operation "SUCCESS" "All dependencies satisfied for group '$group'"
|
||||
return 0
|
||||
else
|
||||
log_operation "ERROR" "Some dependencies not satisfied for group '$group'"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Show dependency status for all services
|
||||
show_dependency_status() {
|
||||
echo "🔍 Dependency Status Overview:"
|
||||
echo "=============================="
|
||||
|
||||
local all_services
|
||||
all_services=$(get_services_in_group "all")
|
||||
|
||||
for service in $all_services; do
|
||||
local status="❌ NOT READY"
|
||||
local health_status=""
|
||||
|
||||
if check_service_ready "$service" "$K8S_NAMESPACE" 0; then
|
||||
status="✅ READY"
|
||||
|
||||
# Check health if available
|
||||
if check_service_health "$service" "$K8S_NAMESPACE"; then
|
||||
health_status=" (healthy)"
|
||||
else
|
||||
health_status=" (unhealthy)"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo " $service: $status$health_status"
|
||||
done
|
||||
}
|
||||
|
||||
# Export functions for use in other scripts
|
||||
if [[ -n "$ZSH_VERSION" ]]; then
|
||||
typeset -f check_service_ready wait_for_service_ready check_infrastructure_ready > /dev/null
|
||||
typeset -f check_app_dependencies check_pod_ready check_service_health > /dev/null
|
||||
typeset -f check_group_dependencies show_dependency_status > /dev/null
|
||||
else
|
||||
export -f check_service_ready wait_for_service_ready check_infrastructure_ready
|
||||
export -f check_app_dependencies check_pod_ready check_service_health
|
||||
export -f check_group_dependencies show_dependency_status
|
||||
fi
|
||||
Reference in New Issue
Block a user