#!/bin/bash # Kubernetes Core Functions # File: k8s-functions.sh # Deploy a service group deploy_service_group() { local group=$1 log_operation "INFO" "Deploying service group: $group" if [[ -z "$K8S_CONFIG_DIR" ]]; then log_operation "ERROR" "K8S_CONFIG_DIR not set" return 1 fi # Get YAML files for the group local yaml_files yaml_files=$(get_yaml_files_for_group "$group") if [[ $? -ne 0 ]]; then log_operation "ERROR" "Failed to get YAML files for group: $group" return 1 fi # Check dependencies first if ! check_group_dependencies "$group"; then log_operation "WARNING" "Some dependencies not satisfied, but proceeding with deployment" fi # Deploy each YAML file local success=true for yaml_file in $yaml_files; do local full_path="$K8S_CONFIG_DIR/$yaml_file" if [[ ! -f "$full_path" ]]; then log_operation "ERROR" "YAML file not found: $full_path" success=false continue fi log_operation "INFO" "Applying YAML file: $yaml_file" log_kubectl_command "kubectl apply -f $full_path" # Apply with retry logic for namespace race condition handling local max_attempts=3 local attempt=1 local file_success=false while [[ $attempt -le $max_attempts ]] && [[ "$file_success" == "false" ]]; do if kubectl apply -f "$full_path"; then log_operation "SUCCESS" "Successfully applied: $yaml_file" file_success=true else if [[ $attempt -lt $max_attempts ]]; then log_operation "WARNING" "Attempt $attempt failed for $yaml_file, retrying after namespace sync..." sleep 3 attempt=$((attempt + 1)) else log_operation "ERROR" "Failed to apply $yaml_file after $max_attempts attempts" success=false fi fi done if [[ "$file_success" == "false" ]]; then success=false fi done if [[ "$success" == "true" ]]; then log_operation "SUCCESS" "Service group '$group' deployed successfully" # Wait for services to be ready wait_for_group_ready "$group" return 0 else log_operation "ERROR" "Failed to deploy service group '$group'" return 1 fi } # Stop a service group stop_service_group() { local group=$1 local mode=${2:-"--keep-data"} # --keep-data, --stop-only, --delete-all log_operation "INFO" "Stopping service group: $group (mode: $mode)" local services services=$(get_services_in_group "$group") if [[ $? -ne 0 ]]; then return 1 fi # Sort services in reverse deployment order for graceful shutdown local service_array read -ra service_array <<< "$services" local sorted_services sorted_services=$(sort_services_by_deploy_order "${service_array[@]}") # Reverse the order local reversed_services=() local service_list=($sorted_services) for ((i=${#service_list[@]}-1; i>=0; i--)); do reversed_services+=("${service_list[i]}") done local success=true for service in "${reversed_services[@]}"; do if ! stop_individual_service "$service" "$mode"; then success=false fi done if [[ "$success" == "true" ]]; then log_operation "SUCCESS" "Service group '$group' stopped successfully" return 0 else log_operation "ERROR" "Failed to stop some services in group '$group'" return 1 fi } # Start a service group (for stopped services) start_service_group() { local group=$1 log_operation "INFO" "Starting service group: $group" local services services=$(get_services_in_group "$group") if [[ $? -ne 0 ]]; then return 1 fi # Sort services by deployment order local service_array read -ra service_array <<< "$services" local sorted_services sorted_services=$(sort_services_by_deploy_order "${service_array[@]}") local success=true for service in $sorted_services; do if ! start_individual_service "$service"; then success=false fi done if [[ "$success" == "true" ]]; then log_operation "SUCCESS" "Service group '$group' started successfully" return 0 else log_operation "ERROR" "Failed to start some services in group '$group'" return 1 fi } # Deploy an individual service deploy_individual_service() { local service=$1 local group=${2:-""} log_operation "INFO" "Deploying individual service: $service" # Get YAML file for the service local yaml_file yaml_file=$(get_yaml_file_for_service "$service") if [[ $? -ne 0 ]]; then return 1 fi local full_path="$K8S_CONFIG_DIR/$yaml_file" if [[ ! -f "$full_path" ]]; then log_operation "ERROR" "YAML file not found: $full_path" return 1 fi # Check dependencies if ! check_app_dependencies "$service"; then log_operation "WARNING" "Dependencies not satisfied, but proceeding with deployment" fi log_operation "INFO" "Applying YAML file: $yaml_file for service: $service" log_kubectl_command "kubectl apply -f $full_path" if kubectl apply -f "$full_path"; then log_operation "SUCCESS" "Successfully deployed service: $service" # Wait for service to be ready wait_for_service_ready "$service" "$K8S_NAMESPACE" 180 return 0 else log_operation "ERROR" "Failed to deploy service: $service" return 1 fi } # Stop an individual service stop_individual_service() { local service=$1 local mode=${2:-"--keep-data"} log_operation "INFO" "Stopping individual service: $service (mode: $mode)" case "$mode" in "--keep-data") # Scale deployment to 0 but keep everything else log_kubectl_command "kubectl scale deployment $service --replicas=0 -n $K8S_NAMESPACE" if kubectl scale deployment "$service" --replicas=0 -n "$K8S_NAMESPACE" 2>/dev/null; then log_operation "SUCCESS" "Scaled down service: $service" else log_operation "WARNING" "Failed to scale down service: $service (may not exist)" fi ;; "--stop-only") # Same as keep-data for Kubernetes log_kubectl_command "kubectl scale deployment $service --replicas=0 -n $K8S_NAMESPACE" if kubectl scale deployment "$service" --replicas=0 -n "$K8S_NAMESPACE" 2>/dev/null; then log_operation "SUCCESS" "Stopped service: $service" else log_operation "WARNING" "Failed to stop service: $service (may not exist)" fi ;; "--delete-all") # Delete the deployment and associated resources log_kubectl_command "kubectl delete deployment $service -n $K8S_NAMESPACE" if kubectl delete deployment "$service" -n "$K8S_NAMESPACE" 2>/dev/null; then log_operation "SUCCESS" "Deleted deployment: $service" else log_operation "WARNING" "Failed to delete deployment: $service (may not exist)" fi # Also delete service if it exists log_kubectl_command "kubectl delete service ${service}-service -n $K8S_NAMESPACE" kubectl delete service "${service}-service" -n "$K8S_NAMESPACE" 2>/dev/null || true ;; *) log_operation "ERROR" "Unknown stop mode: $mode" return 1 ;; esac return 0 } # Start an individual service (restore replicas) start_individual_service() { local service=$1 log_operation "INFO" "Starting individual service: $service" # Check if deployment exists if ! kubectl get deployment "$service" -n "$K8S_NAMESPACE" &>/dev/null; then log_operation "ERROR" "Deployment '$service' does not exist. Use deploy function instead." return 1 fi # Get the original replica count (assuming 1 if not specified) local desired_replicas=1 # For services that typically have multiple replicas case "$service" in "eveai-workers"|"eveai-chat-workers") desired_replicas=2 ;; esac log_kubectl_command "kubectl scale deployment $service --replicas=$desired_replicas -n $K8S_NAMESPACE" if kubectl scale deployment "$service" --replicas="$desired_replicas" -n "$K8S_NAMESPACE"; then log_operation "SUCCESS" "Started service: $service with $desired_replicas replicas" # Wait for service to be ready wait_for_service_ready "$service" "$K8S_NAMESPACE" 180 return 0 else log_operation "ERROR" "Failed to start service: $service" return 1 fi } # Wait for a service group to be ready wait_for_group_ready() { local group=$1 local timeout=${2:-300} log_operation "INFO" "Waiting for service group '$group' to be ready" local services services=$(get_services_in_group "$group") if [[ $? -ne 0 ]]; then return 1 fi local all_ready=true for service in $services; do if ! wait_for_service_ready "$service" "$K8S_NAMESPACE" "$timeout"; then all_ready=false log_operation "WARNING" "Service '$service' in group '$group' failed to become ready" fi done if [[ "$all_ready" == "true" ]]; then log_operation "SUCCESS" "All services in group '$group' are ready" return 0 else log_operation "ERROR" "Some services in group '$group' failed to become ready" return 1 fi } # Get service status get_service_status() { local service=$1 local namespace=${2:-$K8S_NAMESPACE} if ! kubectl get deployment "$service" -n "$namespace" &>/dev/null; then echo "NOT_DEPLOYED" return 1 fi local ready_replicas ready_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.status.readyReplicas}' 2>/dev/null) local desired_replicas desired_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.spec.replicas}' 2>/dev/null) if [[ -z "$ready_replicas" ]]; then ready_replicas=0 fi if [[ -z "$desired_replicas" ]]; then desired_replicas=0 fi if [[ "$desired_replicas" -eq 0 ]]; then echo "STOPPED" elif [[ "$ready_replicas" -eq "$desired_replicas" && "$ready_replicas" -gt 0 ]]; then echo "RUNNING" elif [[ "$ready_replicas" -gt 0 ]]; then echo "PARTIAL" else echo "STARTING" fi } # Show detailed service status show_service_status() { local service=${1:-""} if [[ -n "$service" ]]; then # Show status for specific service echo "🔍 Status for service: $service" echo "================================" local status status=$(get_service_status "$service") echo "Status: $status" if kubectl get deployment "$service" -n "$K8S_NAMESPACE" &>/dev/null; then echo "" echo "Deployment details:" kubectl get deployment "$service" -n "$K8S_NAMESPACE" echo "" echo "Pod details:" kubectl get pods -l "app=$service" -n "$K8S_NAMESPACE" echo "" echo "Recent events:" kubectl get events --field-selector involvedObject.name="$service" -n "$K8S_NAMESPACE" --sort-by='.lastTimestamp' | tail -5 else echo "Deployment not found" fi else # Show status for all services echo "🔍 Service Status Overview:" echo "==========================" local all_services all_services=$(get_services_in_group "all") for svc in $all_services; do local status status=$(get_service_status "$svc") local status_icon case "$status" in "RUNNING") status_icon="✅" ;; "PARTIAL") status_icon="⚠️" ;; "STARTING") status_icon="🔄" ;; "STOPPED") status_icon="⏹️" ;; "NOT_DEPLOYED") status_icon="❌" ;; *) status_icon="❓" ;; esac echo " $svc: $status_icon $status" done fi } # Restart a service (stop and start) restart_service() { local service=$1 log_operation "INFO" "Restarting service: $service" if ! stop_individual_service "$service" "--stop-only"; then log_operation "ERROR" "Failed to stop service: $service" return 1 fi sleep 5 if ! start_individual_service "$service"; then log_operation "ERROR" "Failed to start service: $service" return 1 fi log_operation "SUCCESS" "Successfully restarted service: $service" } # Test service connectivity via Ingress test_connectivity_ingress() { log_operation "INFO" "Testing Ingress connectivity..." # Test Ingress endpoints local endpoints=( "http://minty.ask-eve-ai-local.com:3080/admin/" "http://minty.ask-eve-ai-local.com:3080/api/healthz/ready" "http://minty.ask-eve-ai-local.com:3080/chat-client/" "http://minty.ask-eve-ai-local.com:3080/static/" "http://localhost:3009" # MinIO Console (direct) "http://localhost:3010" # Prometheus (direct) "http://localhost:3012" # Grafana (direct) ) local success_count=0 local total_count=${#endpoints[@]} for endpoint in "${endpoints[@]}"; do log_operation "INFO" "Testing $endpoint..." if curl -f -s --max-time 10 "$endpoint" > /dev/null; then log_operation "SUCCESS" "$endpoint is responding" ((success_count++)) else log_operation "WARNING" "$endpoint is not responding (may still be starting up)" fi done echo "" log_operation "INFO" "Connectivity test completed: $success_count/$total_count endpoints responding" if [[ $success_count -eq $total_count ]]; then log_operation "SUCCESS" "All endpoints are responding" return 0 elif [[ $success_count -gt 0 ]]; then log_operation "WARNING" "Some endpoints are not responding" return 1 else log_operation "ERROR" "No endpoints are responding" return 2 fi } # Show connection information for Ingress setup show_connection_info() { echo "" echo "==================================================" log_operation "SUCCESS" "EveAI $K8S_ENVIRONMENT Cluster Connection Info" echo "==================================================" echo "" echo "🌐 Service URLs:" echo " Main Application (via Ingress only):" echo " • Main App: http://minty.ask-eve-ai-local.com:3080/admin/" echo " • API: http://minty.ask-eve-ai-local.com:3080/api/" echo " • Chat Client: http://minty.ask-eve-ai-local.com:3080/chat-client/" echo " • Static Files: http://minty.ask-eve-ai-local.com:3080/static/" echo "" echo " Infrastructure (direct NodePort access):" echo " • Redis: redis://minty.ask-eve-ai-local.com:3006" echo " • MinIO S3: http://minty.ask-eve-ai-local.com:3008" echo " • MinIO Console: http://minty.ask-eve-ai-local.com:3009" echo "" echo " Monitoring (direct NodePort access):" echo " • Prometheus: http://minty.ask-eve-ai-local.com:3010" echo " • Grafana: http://minty.ask-eve-ai-local.com:3012" echo "" echo "🔑 Default Credentials:" echo " • MinIO: minioadmin / minioadmin" echo " • Grafana: admin / admin" echo " • Flower: Felucia / Jungles" echo "" echo "🛠️ Management Commands:" echo " • kubectl get all -n $K8S_NAMESPACE" echo " • kubectl get ingress -n $K8S_NAMESPACE" echo " • kubectl logs -f deployment/eveai-app -n $K8S_NAMESPACE" echo " • kubectl describe ingress eveai-ingress -n $K8S_NAMESPACE" echo "" echo "🗂️ Data Persistence:" echo " • Host data path: \$HOME/k8s-data/$K8S_ENVIRONMENT/" echo " • Logs path: \$HOME/k8s-data/$K8S_ENVIRONMENT/logs/" echo "" echo "📊 Environment Details:" echo " • Environment: $K8S_ENVIRONMENT" echo " • Version: $K8S_VERSION" echo " • Cluster: $K8S_CLUSTER" echo " • Namespace: $K8S_NAMESPACE" echo " • Config Dir: $K8S_CONFIG_DIR" } # Deploy all services in structured order (like deploy-all-services.sh) deploy_all_structured() { log_operation "INFO" "Starting structured deployment of all services" echo "" echo "==================================================" echo "🚀 Deploying EveAI $K8S_ENVIRONMENT Services" echo "==================================================" # Stage 1: Infrastructure log_operation "INFO" "Stage 1: Deploying infrastructure services..." if ! deploy_service_group "infrastructure"; then log_operation "ERROR" "Failed to deploy infrastructure services" return 1 fi log_operation "INFO" "Waiting for infrastructure to be ready..." if ! wait_for_group_ready "infrastructure"; then log_operation "ERROR" "Infrastructure services failed to become ready" return 1 fi sleep 5 # Stage 2: Application services log_operation "INFO" "Stage 2: Deploying application services..." if ! deploy_service_group "apps"; then log_operation "ERROR" "Failed to deploy application services" return 1 fi log_operation "INFO" "Waiting for application services to be ready..." if ! wait_for_group_ready "apps"; then log_operation "WARNING" "Some application services may still be starting" fi sleep 5 # Stage 3: Static files and ingress log_operation "INFO" "Stage 3: Deploying static files and ingress..." if ! deploy_service_group "static"; then log_operation "ERROR" "Failed to deploy static services" return 1 fi # Stage 4: Monitoring services log_operation "INFO" "Stage 4: Deploying monitoring services..." if ! deploy_service_group "monitoring"; then log_operation "WARNING" "Failed to deploy monitoring services (continuing anyway)" fi sleep 10 # Final verification log_operation "INFO" "Running final connectivity tests..." test_connectivity_ingress show_connection_info log_operation "SUCCESS" "Structured deployment completed!" return 0 } # Export functions for use in other scripts if [[ -n "$ZSH_VERSION" ]]; then typeset -f deploy_service_group stop_service_group start_service_group > /dev/null typeset -f deploy_individual_service stop_individual_service start_individual_service > /dev/null typeset -f wait_for_group_ready get_service_status show_service_status restart_service > /dev/null typeset -f test_connectivity_ingress show_connection_info deploy_all_structured > /dev/null else export -f deploy_service_group stop_service_group start_service_group export -f deploy_individual_service stop_individual_service start_individual_service export -f wait_for_group_ready get_service_status show_service_status restart_service export -f test_connectivity_ingress show_connection_info deploy_all_structured fi