eveAI/k8s/scripts/k8s-functions.sh

#!/bin/bash
# Kubernetes Core Functions
# File: k8s-functions.sh

# Deploy a service group
deploy_service_group() {
    local group=$1

    log_operation "INFO" "Deploying service group: $group"

    if [[ -z "$K8S_CONFIG_DIR" ]]; then
        log_operation "ERROR" "K8S_CONFIG_DIR not set"
        return 1
    fi

    # Get YAML files for the group
    local yaml_files
    yaml_files=$(get_yaml_files_for_group "$group")

    if [[ $? -ne 0 ]]; then
        log_operation "ERROR" "Failed to get YAML files for group: $group"
        return 1
    fi

    # Check dependencies first
    if ! check_group_dependencies "$group"; then
        log_operation "WARNING" "Some dependencies not satisfied, but proceeding with deployment"
    fi

    # Deploy each YAML file
    local success=true
    for yaml_file in $yaml_files; do
        local full_path="$K8S_CONFIG_DIR/$yaml_file"

        if [[ ! -f "$full_path" ]]; then
            log_operation "ERROR" "YAML file not found: $full_path"
            success=false
            continue
        fi

        log_operation "INFO" "Applying YAML file: $yaml_file"
        log_kubectl_command "kubectl apply -f $full_path"

        # Apply with retry logic for namespace race condition handling
        local max_attempts=3
        local attempt=1
        local file_success=false

        while [[ $attempt -le $max_attempts ]] && [[ "$file_success" == "false" ]]; do
            if kubectl apply -f "$full_path"; then
                log_operation "SUCCESS" "Successfully applied: $yaml_file"
                file_success=true
            else
                if [[ $attempt -lt $max_attempts ]]; then
                    log_operation "WARNING" "Attempt $attempt failed for $yaml_file, retrying after namespace sync..."
                    sleep 3
                    attempt=$((attempt + 1))
                else
                    log_operation "ERROR" "Failed to apply $yaml_file after $max_attempts attempts"
                    success=false
                fi
            fi
        done

        if [[ "$file_success" == "false" ]]; then
            success=false
        fi
    done

    if [[ "$success" == "true" ]]; then
        log_operation "SUCCESS" "Service group '$group' deployed successfully"

        # Wait for services to be ready
        wait_for_group_ready "$group"
        return 0
    else
        log_operation "ERROR" "Failed to deploy service group '$group'"
        return 1
    fi
}

# Stop a service group
stop_service_group() {
    local group=$1
    local mode=${2:-"--keep-data"}  # --keep-data, --stop-only, --delete-all

    log_operation "INFO" "Stopping service group: $group (mode: $mode)"

    local services
    services=$(get_services_in_group "$group")

    if [[ $? -ne 0 ]]; then
        return 1
    fi

    # Sort services in reverse deployment order for graceful shutdown
    local service_array
    read -ra service_array <<< "$services"
    local sorted_services
    sorted_services=$(sort_services_by_deploy_order "${service_array[@]}")

    # Reverse the order
    local reversed_services=()
    local service_list=($sorted_services)
    for ((i=${#service_list[@]}-1; i>=0; i--)); do
        reversed_services+=("${service_list[i]}")
    done

    local success=true
    for service in "${reversed_services[@]}"; do
        if ! stop_individual_service "$service" "$mode"; then
            success=false
        fi
    done

    if [[ "$success" == "true" ]]; then
        log_operation "SUCCESS" "Service group '$group' stopped successfully"
        return 0
    else
        log_operation "ERROR" "Failed to stop some services in group '$group'"
        return 1
    fi
}

# Start a service group (for stopped services)
start_service_group() {
    local group=$1

    log_operation "INFO" "Starting service group: $group"

    local services
    services=$(get_services_in_group "$group")

    if [[ $? -ne 0 ]]; then
        return 1
    fi

    # Sort services by deployment order
    local service_array
    read -ra service_array <<< "$services"
    local sorted_services
    sorted_services=$(sort_services_by_deploy_order "${service_array[@]}")

    local success=true
    for service in $sorted_services; do
        if ! start_individual_service "$service"; then
            success=false
        fi
    done

    if [[ "$success" == "true" ]]; then
        log_operation "SUCCESS" "Service group '$group' started successfully"
        return 0
    else
        log_operation "ERROR" "Failed to start some services in group '$group'"
        return 1
    fi
}

# Deploy an individual service
deploy_individual_service() {
    local service=$1
    local group=${2:-""}

    log_operation "INFO" "Deploying individual service: $service"

    # Get YAML file for the service
    local yaml_file
    yaml_file=$(get_yaml_file_for_service "$service")

    if [[ $? -ne 0 ]]; then
        return 1
    fi

    local full_path="$K8S_CONFIG_DIR/$yaml_file"

    if [[ ! -f "$full_path" ]]; then
        log_operation "ERROR" "YAML file not found: $full_path"
        return 1
    fi

    # Check dependencies
    if ! check_app_dependencies "$service"; then
        log_operation "WARNING" "Dependencies not satisfied, but proceeding with deployment"
    fi

    log_operation "INFO" "Applying YAML file: $yaml_file for service: $service"
    log_kubectl_command "kubectl apply -f $full_path"

    if kubectl apply -f "$full_path"; then
        log_operation "SUCCESS" "Successfully deployed service: $service"

        # Wait for service to be ready
        wait_for_service_ready "$service" "$K8S_NAMESPACE" 180
        return 0
    else
        log_operation "ERROR" "Failed to deploy service: $service"
        return 1
    fi
}

# Stop an individual service
stop_individual_service() {
    local service=$1
    local mode=${2:-"--keep-data"}

    log_operation "INFO" "Stopping individual service: $service (mode: $mode)"

    case "$mode" in
        "--keep-data")
            # Scale deployment to 0 but keep everything else
            log_kubectl_command "kubectl scale deployment $service --replicas=0 -n $K8S_NAMESPACE"
            if kubectl scale deployment "$service" --replicas=0 -n "$K8S_NAMESPACE" 2>/dev/null; then
                log_operation "SUCCESS" "Scaled down service: $service"
            else
                log_operation "WARNING" "Failed to scale down service: $service (may not exist)"
            fi
            ;;
        "--stop-only")
            # Same as keep-data for Kubernetes
            log_kubectl_command "kubectl scale deployment $service --replicas=0 -n $K8S_NAMESPACE"
            if kubectl scale deployment "$service" --replicas=0 -n "$K8S_NAMESPACE" 2>/dev/null; then
                log_operation "SUCCESS" "Stopped service: $service"
            else
                log_operation "WARNING" "Failed to stop service: $service (may not exist)"
            fi
            ;;
        "--delete-all")
            # Delete the deployment and associated resources
            log_kubectl_command "kubectl delete deployment $service -n $K8S_NAMESPACE"
            if kubectl delete deployment "$service" -n "$K8S_NAMESPACE" 2>/dev/null; then
                log_operation "SUCCESS" "Deleted deployment: $service"
            else
                log_operation "WARNING" "Failed to delete deployment: $service (may not exist)"
            fi

            # Also delete service if it exists
            log_kubectl_command "kubectl delete service ${service}-service -n $K8S_NAMESPACE"
            kubectl delete service "${service}-service" -n "$K8S_NAMESPACE" 2>/dev/null || true
            ;;
        *)
            log_operation "ERROR" "Unknown stop mode: $mode"
            return 1
            ;;
    esac

    return 0
}

# Start an individual service (restore replicas)
start_individual_service() {
    local service=$1

    log_operation "INFO" "Starting individual service: $service"

    # Check if deployment exists
    if ! kubectl get deployment "$service" -n "$K8S_NAMESPACE" &>/dev/null; then
        log_operation "ERROR" "Deployment '$service' does not exist. Use deploy function instead."
        return 1
    fi

    # Get the original replica count (assuming 1 if not specified)
    local desired_replicas=1

    # For services that typically have multiple replicas
    case "$service" in
        "eveai-workers"|"eveai-chat-workers")
            desired_replicas=2
            ;;
    esac

    log_kubectl_command "kubectl scale deployment $service --replicas=$desired_replicas -n $K8S_NAMESPACE"
    if kubectl scale deployment "$service" --replicas="$desired_replicas" -n "$K8S_NAMESPACE"; then
        log_operation "SUCCESS" "Started service: $service with $desired_replicas replicas"

        # Wait for service to be ready
        wait_for_service_ready "$service" "$K8S_NAMESPACE" 180
        return 0
    else
        log_operation "ERROR" "Failed to start service: $service"
        return 1
    fi
}

# Wait for a service group to be ready
wait_for_group_ready() {
    local group=$1
    local timeout=${2:-300}

    log_operation "INFO" "Waiting for service group '$group' to be ready"

    local services
    services=$(get_services_in_group "$group")

    if [[ $? -ne 0 ]]; then
        return 1
    fi

    local all_ready=true
    for service in $services; do
        if ! wait_for_service_ready "$service" "$K8S_NAMESPACE" "$timeout"; then
            all_ready=false
            log_operation "WARNING" "Service '$service' in group '$group' failed to become ready"
        fi
    done

    if [[ "$all_ready" == "true" ]]; then
        log_operation "SUCCESS" "All services in group '$group' are ready"
        return 0
    else
        log_operation "ERROR" "Some services in group '$group' failed to become ready"
        return 1
    fi
}

# Get service status
get_service_status() {
    local service=$1
    local namespace=${2:-$K8S_NAMESPACE}

    if ! kubectl get deployment "$service" -n "$namespace" &>/dev/null; then
        echo "NOT_DEPLOYED"
        return 1
    fi

    local ready_replicas
    ready_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)
    local desired_replicas
    desired_replicas=$(kubectl get deployment "$service" -n "$namespace" -o jsonpath='{.spec.replicas}' 2>/dev/null)

    if [[ -z "$ready_replicas" ]]; then
        ready_replicas=0
    fi

    if [[ -z "$desired_replicas" ]]; then
        desired_replicas=0
    fi

    if [[ "$desired_replicas" -eq 0 ]]; then
        echo "STOPPED"
    elif [[ "$ready_replicas" -eq "$desired_replicas" && "$ready_replicas" -gt 0 ]]; then
        echo "RUNNING"
    elif [[ "$ready_replicas" -gt 0 ]]; then
        echo "PARTIAL"
    else
        echo "STARTING"
    fi
}

# Show detailed service status
show_service_status() {
    local service=${1:-""}

    if [[ -n "$service" ]]; then
        # Show status for specific service
        echo "🔍 Status for service: $service"
        echo "================================"

        local status
        status=$(get_service_status "$service")
        echo "Status: $status"

        if kubectl get deployment "$service" -n "$K8S_NAMESPACE" &>/dev/null; then
            echo ""
            echo "Deployment details:"
            kubectl get deployment "$service" -n "$K8S_NAMESPACE"

            echo ""
            echo "Pod details:"
            kubectl get pods -l "app=$service" -n "$K8S_NAMESPACE"

            echo ""
            echo "Recent events:"
            kubectl get events --field-selector involvedObject.name="$service" -n "$K8S_NAMESPACE" --sort-by='.lastTimestamp' | tail -5
        else
            echo "Deployment not found"
        fi
    else
        # Show status for all services
        echo "🔍 Service Status Overview:"
        echo "=========================="

        local all_services
        all_services=$(get_services_in_group "all")

        for svc in $all_services; do
            local status
            status=$(get_service_status "$svc")

            local status_icon
            case "$status" in
                "RUNNING") status_icon="✅" ;;
                "PARTIAL") status_icon="⚠️" ;;
                "STARTING") status_icon="🔄" ;;
                "STOPPED") status_icon="⏹️" ;;
                "NOT_DEPLOYED") status_icon="❌" ;;
                *) status_icon="❓" ;;
            esac

            echo "  $svc: $status_icon $status"
        done
    fi
}

# Restart a service (stop and start)
restart_service() {
    local service=$1

    log_operation "INFO" "Restarting service: $service"

    if ! stop_individual_service "$service" "--stop-only"; then
        log_operation "ERROR" "Failed to stop service: $service"
        return 1
    fi

    sleep 5

    if ! start_individual_service "$service"; then
        log_operation "ERROR" "Failed to start service: $service"
        return 1
    fi

    log_operation "SUCCESS" "Successfully restarted service: $service"
}

# Test service connectivity via Ingress
test_connectivity_ingress() {
    log_operation "INFO" "Testing Ingress connectivity..."

    # Test Ingress endpoints
    local endpoints=(
        "http://minty.ask-eve-ai-local.com:3080/admin/"
        "http://minty.ask-eve-ai-local.com:3080/api/healthz/ready"
        "http://minty.ask-eve-ai-local.com:3080/chat-client/"
        "http://minty.ask-eve-ai-local.com:3080/static/"
        "http://localhost:3009"  # MinIO Console (direct)
        "http://localhost:3010"  # Prometheus (direct)
        "http://localhost:3012"  # Grafana (direct)
    )

    local success_count=0
    local total_count=${#endpoints[@]}

    for endpoint in "${endpoints[@]}"; do
        log_operation "INFO" "Testing $endpoint..."
        if curl -f -s --max-time 10 "$endpoint" > /dev/null; then
            log_operation "SUCCESS" "$endpoint is responding"
            ((success_count++))
        else
            log_operation "WARNING" "$endpoint is not responding (may still be starting up)"
        fi
    done

    echo ""
    log_operation "INFO" "Connectivity test completed: $success_count/$total_count endpoints responding"

    if [[ $success_count -eq $total_count ]]; then
        log_operation "SUCCESS" "All endpoints are responding"
        return 0
    elif [[ $success_count -gt 0 ]]; then
        log_operation "WARNING" "Some endpoints are not responding"
        return 1
    else
        log_operation "ERROR" "No endpoints are responding"
        return 2
    fi
}

# Show connection information for Ingress setup
show_connection_info() {
    echo ""
    echo "=================================================="
    log_operation "SUCCESS" "EveAI $K8S_ENVIRONMENT Cluster Connection Info"
    echo "=================================================="
    echo ""
    echo "🌐 Service URLs:"
    echo "  Main Application (via Ingress only):"
    echo "    • Main App:         http://minty.ask-eve-ai-local.com:3080/admin/"
    echo "    • API:              http://minty.ask-eve-ai-local.com:3080/api/"
    echo "    • Chat Client:      http://minty.ask-eve-ai-local.com:3080/chat-client/"
    echo "    • Static Files:     http://minty.ask-eve-ai-local.com:3080/static/"
    echo ""
    echo "  Infrastructure (direct NodePort access):"
    echo "    • Redis:            redis://minty.ask-eve-ai-local.com:3006"
    echo "    • MinIO S3:         http://minty.ask-eve-ai-local.com:3008"
    echo "    • MinIO Console:    http://minty.ask-eve-ai-local.com:3009"
    echo ""
    echo "  Monitoring (direct NodePort access):"
    echo "    • Prometheus:       http://minty.ask-eve-ai-local.com:3010"
    echo "    • Grafana:          http://minty.ask-eve-ai-local.com:3012"
    echo ""
    echo "🔑 Default Credentials:"
    echo "  • MinIO:    minioadmin / minioadmin"
    echo "  • Grafana:  admin / admin"
    echo "  • Flower:   Felucia / Jungles"
    echo ""
    echo "🛠️  Management Commands:"
    echo "  • kubectl get all -n $K8S_NAMESPACE"
    echo "  • kubectl get ingress -n $K8S_NAMESPACE"
    echo "  • kubectl logs -f deployment/eveai-app -n $K8S_NAMESPACE"
    echo "  • kubectl describe ingress eveai-ingress -n $K8S_NAMESPACE"
    echo ""
    echo "🗂️  Data Persistence:"
    echo "  • Host data path: \$HOME/k8s-data/$K8S_ENVIRONMENT/"
    echo "  • Logs path:      \$HOME/k8s-data/$K8S_ENVIRONMENT/logs/"
    echo ""
    echo "📊 Environment Details:"
    echo "  • Environment:      $K8S_ENVIRONMENT"
    echo "  • Version:          $K8S_VERSION"
    echo "  • Cluster:          $K8S_CLUSTER"
    echo "  • Namespace:        $K8S_NAMESPACE"
    echo "  • Config Dir:       $K8S_CONFIG_DIR"
}

# Deploy all services in structured order (like deploy-all-services.sh)
deploy_all_structured() {
    log_operation "INFO" "Starting structured deployment of all services"

    echo ""
    echo "=================================================="
    echo "🚀 Deploying EveAI $K8S_ENVIRONMENT Services"
    echo "=================================================="

    # Stage 1: Infrastructure
    log_operation "INFO" "Stage 1: Deploying infrastructure services..."
    if ! deploy_service_group "infrastructure"; then
        log_operation "ERROR" "Failed to deploy infrastructure services"
        return 1
    fi

    log_operation "INFO" "Waiting for infrastructure to be ready..."
    if ! wait_for_group_ready "infrastructure"; then
        log_operation "ERROR" "Infrastructure services failed to become ready"
        return 1
    fi

    sleep 5

    # Stage 2: Application services
    log_operation "INFO" "Stage 2: Deploying application services..."
    if ! deploy_service_group "apps"; then
        log_operation "ERROR" "Failed to deploy application services"
        return 1
    fi

    log_operation "INFO" "Waiting for application services to be ready..."
    if ! wait_for_group_ready "apps"; then
        log_operation "WARNING" "Some application services may still be starting"
    fi

    sleep 5

    # Stage 3: Static files and ingress
    log_operation "INFO" "Stage 3: Deploying static files and ingress..."
    if ! deploy_service_group "static"; then
        log_operation "ERROR" "Failed to deploy static services"
        return 1
    fi

    # Stage 4: Monitoring services
    log_operation "INFO" "Stage 4: Deploying monitoring services..."
    if ! deploy_service_group "monitoring"; then
        log_operation "WARNING" "Failed to deploy monitoring services (continuing anyway)"
    fi

    sleep 10

    # Final verification
    log_operation "INFO" "Running final connectivity tests..."
    test_connectivity_ingress

    show_connection_info

    log_operation "SUCCESS" "Structured deployment completed!"
    return 0
}

# Export functions for use in other scripts
if [[ -n "$ZSH_VERSION" ]]; then
    typeset -f deploy_service_group stop_service_group start_service_group > /dev/null
    typeset -f deploy_individual_service stop_individual_service start_individual_service > /dev/null
    typeset -f wait_for_group_ready get_service_status show_service_status restart_service > /dev/null
    typeset -f test_connectivity_ingress show_connection_info deploy_all_structured > /dev/null
else
    export -f deploy_service_group stop_service_group start_service_group
    export -f deploy_individual_service stop_individual_service start_individual_service
    export -f wait_for_group_ready get_service_status show_service_status restart_service
    export -f test_connectivity_ingress show_connection_info deploy_all_structured
fi