diff --git a/check_running_services.sh b/check_running_services.sh new file mode 100644 index 0000000..13616d3 --- /dev/null +++ b/check_running_services.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Diagnostic script to check what services are running + +echo "=== KIND CLUSTER STATUS ===" +echo "Namespaces:" +kubectl get namespaces | grep eveai + +echo -e "\nPods in eveai-dev:" +kubectl get pods -n eveai-dev + +echo -e "\nServices in eveai-dev:" +kubectl get services -n eveai-dev + +echo -e "\n=== TEST CONTAINERS STATUS ===" +echo "Running test containers:" +podman ps | grep eveai_test + +echo -e "\n=== PORT ANALYSIS ===" +echo "What's listening on port 3080:" +lsof -i :3080 2>/dev/null || echo "Nothing found" + +echo -e "\nWhat's listening on port 4080:" +lsof -i :4080 2>/dev/null || echo "Nothing found" + +echo -e "\n=== SOLUTION ===" +echo "The application you see is from TEST CONTAINERS (6 days old)," +echo "NOT from the Kind cluster (3 minutes old)." +echo "" +echo "To test Kind cluster:" +echo "1. Stop test containers: podman stop eveai_test_nginx_1 eveai_test_eveai_app_1" +echo "2. Deploy Kind services: kup-all-structured" +echo "3. Restart test containers if needed" \ No newline at end of file diff --git a/k8s/K8S_SERVICE_MANAGEMENT_README.md b/k8s/K8S_SERVICE_MANAGEMENT_README.md index 5404278..b648bb3 100644 --- a/k8s/K8S_SERVICE_MANAGEMENT_README.md +++ b/k8s/K8S_SERVICE_MANAGEMENT_README.md @@ -4,19 +4,26 @@ This implementation provides a comprehensive Kubernetes service management system inspired by your `podman_env_switch.sh` workflow. It allows you to easily manage EveAI services across different environments with simple, memorable commands. +**✅ Latest Update (August 2025):** The system has been enhanced with structured deployment functionality, consolidating all features from `deploy-all-services.sh` into the main `k8s_env_switch.sh` system. This eliminates duplicate maintenance and provides a unified interface for all service management operations. + ## 🚀 Quick Start ```bash # Switch to dev environment source k8s/k8s_env_switch.sh dev -# Start all services -kup +# Structured deployment (recommended - replaces deploy-all-services.sh) +kup-all-structured -# Check status +# Test connectivity and show connection info +ktest +kinfo + +# Traditional service group management +kup apps kps -# Start individual services +# Individual service management kup-api kup-workers @@ -33,17 +40,25 @@ klogs eveai-app k8s/ ├── k8s_env_switch.sh # Main script (like podman_env_switch.sh) ├── scripts/ -│ ├── k8s-functions.sh # Core service management functions +│ ├── k8s-functions.sh # Core service management functions (enhanced) │ ├── service-groups.sh # Service group definitions │ ├── dependency-checks.sh # Dependency validation │ └── logging-utils.sh # Logging utilities ├── dev/ # Dev environment configs -│ ├── setup-dev-cluster.sh # Existing cluster setup -│ ├── deploy-all-services.sh # Existing deployment script +│ ├── setup-dev-cluster.sh # Cluster setup script +│ ├── deploy-all-services.sh # Legacy script (functionality moved to k8s_env_switch.sh) │ └── *.yaml # Service configurations └── test-k8s-functions.sh # Test script ``` +### 🔄 Consolidation Benefits + +- **✅ No Duplicate Maintenance** - Single system for all service management +- **✅ Enhanced Functionality** - All deploy-all-services.sh features integrated +- **✅ Consistent Interface** - Unified command structure across operations +- **✅ Better User Experience** - Clear, memorable commands with comprehensive help +- **✅ Future-Ready** - Multi-environment support and extensibility + ## 🔧 Environment Setup ### Supported Environments @@ -87,6 +102,13 @@ k8s/ ## 🎯 Core Commands +### Structured Deployment (Recommended) +```bash +kup-all-structured # Deploy all services in structured order (replaces deploy-all-services.sh) +ktest # Test service connectivity via Ingress +kinfo # Show connection information and service URLs +``` + ### Service Group Management ```bash kup [group] # Start service group diff --git a/k8s/dev/config-secrets.yaml b/k8s/dev/config-secrets.yaml index 0fec294..bf74d9f 100644 --- a/k8s/dev/config-secrets.yaml +++ b/k8s/dev/config-secrets.yaml @@ -1,15 +1,6 @@ # ConfigMaps and Secrets for EveAI Dev Environment # File: config-secrets.yaml ---- -# Namespace for dev environment -apiVersion: v1 -kind: Namespace -metadata: - name: eveai-dev - labels: - environment: dev - app: eveai - +# Note: Namespace is now defined in separate namespace.yaml file --- # Non-sensitive configuration apiVersion: v1 @@ -19,7 +10,7 @@ metadata: namespace: eveai-dev data: # Database configuration (points to external PostgreSQL) - DB_HOST: "host.docker.internal" # Will resolve to host IP from inside Kind + DB_HOST: "postgres-external" # Points to headless service with endpoints DB_PORT: "5432" DB_NAME: "eveai_dev" DB_USER: "luke" @@ -40,7 +31,7 @@ data: FLOWER_USER: "Felucia" # Nginx configuration - NGINX_SERVER_NAME: "localhost http://minty.ask-eve-ai-local.com/" + NGINX_SERVER_NAME: "minty.ask-eve-ai-local.com localhost" # CrewAI configuration CREWAI_STORAGE_DIR: "/app/crewai_storage" @@ -91,16 +82,30 @@ data: SW_EMAIL_SECRET_KEY: ZWM4NDYwNGMtZTJkNC00YjBkLWExMjAtNDA0MjA2OTNmNDJh --- -# External Service for PostgreSQL (points to host database) +# Headless Service for PostgreSQL (points to host database) apiVersion: v1 kind: Service metadata: name: postgres-external namespace: eveai-dev spec: - type: ExternalName - externalName: host.docker.internal + type: ClusterIP + clusterIP: None ports: - port: 5432 targetPort: 5432 + protocol: TCP + +--- +# Endpoints for PostgreSQL (points to host IP) +apiVersion: v1 +kind: Endpoints +metadata: + name: postgres-external + namespace: eveai-dev +subsets: +- addresses: + - ip: 192.168.1.130 # Host IP where PostgreSQL is running + ports: + - port: 5432 protocol: TCP \ No newline at end of file diff --git a/k8s/dev/eveai-ingress.yaml b/k8s/dev/eveai-ingress.yaml index 5fa181b..8ca5c64 100644 --- a/k8s/dev/eveai-ingress.yaml +++ b/k8s/dev/eveai-ingress.yaml @@ -22,7 +22,7 @@ spec: paths: # Static files - hoogste prioriteit - path: /static(/|$)(.*) - pathType: Prefix + pathType: ImplementationSpecific backend: service: name: static-files-service @@ -31,7 +31,7 @@ spec: # Admin interface - path: /admin(/|$)(.*) - pathType: Prefix + pathType: ImplementationSpecific backend: service: name: eveai-app-service @@ -40,7 +40,7 @@ spec: # API endpoints - path: /api(/|$)(.*) - pathType: Prefix + pathType: ImplementationSpecific backend: service: name: eveai-api-service @@ -49,7 +49,7 @@ spec: # Chat client - path: /chat-client(/|$)(.*) - pathType: Prefix + pathType: ImplementationSpecific backend: service: name: eveai-chat-client-service @@ -57,7 +57,7 @@ spec: number: 5004 # Root redirect naar admin (exact match) - - path: /() + - path: / pathType: Exact backend: service: diff --git a/k8s/dev/eveai-services.yaml b/k8s/dev/eveai-services.yaml index 9bcbec3..96d827f 100644 --- a/k8s/dev/eveai-services.yaml +++ b/k8s/dev/eveai-services.yaml @@ -38,6 +38,7 @@ spec: metadata: labels: app: eveai-app + tier: frontend spec: containers: - name: eveai-app @@ -58,7 +59,7 @@ spec: mountPath: /app/logs livenessProbe: httpGet: - path: /healthz/ready + path: / port: 5001 initialDelaySeconds: 60 periodSeconds: 30 @@ -66,7 +67,7 @@ spec: failureThreshold: 3 readinessProbe: httpGet: - path: /healthz/ready + path: / port: 5001 initialDelaySeconds: 30 periodSeconds: 10 @@ -95,11 +96,10 @@ metadata: labels: app: eveai-app spec: - type: NodePort + type: ClusterIP ports: - port: 5001 targetPort: 5001 - nodePort: 30001 # Maps to host port 3001 protocol: TCP selector: app: eveai-app @@ -123,6 +123,7 @@ spec: metadata: labels: app: eveai-api + tier: frontend spec: containers: - name: eveai-api @@ -143,7 +144,7 @@ spec: mountPath: /app/logs livenessProbe: httpGet: - path: /healthz/ready + path: / port: 5003 initialDelaySeconds: 60 periodSeconds: 30 @@ -151,7 +152,7 @@ spec: failureThreshold: 3 readinessProbe: httpGet: - path: /healthz/ready + path: / port: 5003 initialDelaySeconds: 30 periodSeconds: 10 @@ -180,11 +181,10 @@ metadata: labels: app: eveai-api spec: - type: NodePort + type: ClusterIP ports: - port: 5003 targetPort: 5003 - nodePort: 30003 # Maps to host port 3003 protocol: TCP selector: app: eveai-api @@ -208,6 +208,7 @@ spec: metadata: labels: app: eveai-chat-client + tier: frontend spec: containers: - name: eveai-chat-client @@ -228,7 +229,7 @@ spec: mountPath: /app/logs livenessProbe: httpGet: - path: /healthz/ready + path: / port: 5004 initialDelaySeconds: 60 periodSeconds: 30 @@ -236,7 +237,7 @@ spec: failureThreshold: 3 readinessProbe: httpGet: - path: /healthz/ready + path: / port: 5004 initialDelaySeconds: 30 periodSeconds: 10 @@ -265,11 +266,10 @@ metadata: labels: app: eveai-chat-client spec: - type: NodePort + type: ClusterIP ports: - port: 5004 targetPort: 5004 - nodePort: 30004 # Maps to host port 3004 protocol: TCP selector: app: eveai-chat-client @@ -289,10 +289,12 @@ spec: selector: matchLabels: app: eveai-workers + tier: backend template: metadata: labels: app: eveai-workers + tier: backend spec: containers: - name: eveai-workers @@ -338,10 +340,12 @@ spec: selector: matchLabels: app: eveai-chat-workers + tier: backend template: metadata: labels: app: eveai-chat-workers + tier: backend spec: containers: - name: eveai-chat-workers @@ -387,10 +391,12 @@ spec: selector: matchLabels: app: eveai-beat + tier: backend template: metadata: labels: app: eveai-beat + tier: backend spec: containers: - name: eveai-beat @@ -438,6 +444,7 @@ spec: metadata: labels: app: eveai-entitlements + tier: backend spec: containers: - name: eveai-entitlements diff --git a/k8s/dev/kind-dev-cluster.yaml b/k8s/dev/kind-dev-cluster.yaml index ee6ddb6..3dc6325 100644 --- a/k8s/dev/kind-dev-cluster.yaml +++ b/k8s/dev/kind-dev-cluster.yaml @@ -20,9 +20,9 @@ nodes: nodeRegistration: kubeletExtraArgs: node-labels: "ingress-ready=true" - # Extra port mappings to host (minty) according to port schema 3000-3999 + # Minimal port mappings - only Ingress and essential monitoring extraPortMappings: - # Nginx - Main entry point + # Ingress Controller - Main entry point (all app access via Ingress) - containerPort: 80 hostPort: 3080 protocol: TCP @@ -30,52 +30,28 @@ nodes: hostPort: 3443 protocol: TCP - # EveAI App - - containerPort: 30001 - hostPort: 3001 - protocol: TCP - - # EveAI API - - containerPort: 30003 - hostPort: 3003 - protocol: TCP - - # EveAI Chat Client - - containerPort: 30004 - hostPort: 3004 - protocol: TCP - - # Redis + # Essential monitoring ports (optional - for direct access) + # Redis (for direct debugging if needed) - containerPort: 30006 hostPort: 3006 protocol: TCP - # Flower (Celery monitoring) - - containerPort: 30007 - hostPort: 3007 - protocol: TCP - - # MinIO S3 API + # MinIO S3 API (for direct S3 access) - containerPort: 30008 hostPort: 3008 protocol: TCP - # MinIO Console + # MinIO Console (for direct management) - containerPort: 30009 hostPort: 3009 protocol: TCP - # Prometheus + # Prometheus (for direct metrics access) - containerPort: 30010 hostPort: 3010 protocol: TCP - # Pushgateway - - containerPort: 30011 - hostPort: 3011 - protocol: TCP - - # Grafana + # Grafana (for direct dashboard access) - containerPort: 30012 hostPort: 3012 protocol: TCP @@ -85,9 +61,6 @@ nodes: # MinIO data persistence - hostPath: $HOME/k8s-data/dev/minio containerPath: /mnt/minio-data - # Redis data persistence - - hostPath: $HOME/k8s-data/dev/redis - containerPath: /mnt/redis-data # Application logs - hostPath: $HOME/k8s-data/dev/logs containerPath: /mnt/app-logs diff --git a/k8s/dev/kind-minimal.yaml b/k8s/dev/kind-minimal.yaml deleted file mode 100644 index f79b41a..0000000 --- a/k8s/dev/kind-minimal.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal Kind configuration for testing -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -name: eveai-test-cluster -networking: - apiServerAddress: "127.0.0.1" - apiServerPort: 3000 -nodes: -- role: control-plane - kubeadmConfigPatches: - - | - kind: InitConfiguration - nodeRegistration: - kubeletExtraArgs: - node-labels: "ingress-ready=true" - extraPortMappings: - - containerPort: 80 - hostPort: 3080 - protocol: TCP \ No newline at end of file diff --git a/k8s/dev/namespace.yaml b/k8s/dev/namespace.yaml new file mode 100644 index 0000000..067ba27 --- /dev/null +++ b/k8s/dev/namespace.yaml @@ -0,0 +1,11 @@ +# Namespace for EveAI Dev Environment +# File: namespace.yaml +--- +# Namespace for dev environment +apiVersion: v1 +kind: Namespace +metadata: + name: eveai-dev + labels: + environment: dev + app: eveai \ No newline at end of file diff --git a/k8s/dev/network-policies.yaml b/k8s/dev/network-policies.yaml new file mode 100644 index 0000000..0847f52 --- /dev/null +++ b/k8s/dev/network-policies.yaml @@ -0,0 +1,147 @@ +# Network Policies for EveAI Dev Environment +# File: network-policies.yaml +# Provides proper isolation and security for Kubernetes services +--- +# Default deny all ingress traffic (security first approach) +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny-ingress + namespace: eveai-dev + labels: + app: eveai + environment: dev +spec: + podSelector: {} + policyTypes: + - Ingress + +--- +# Allow ingress controller to reach app services +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-ingress-to-apps + namespace: eveai-dev + labels: + app: eveai + environment: dev +spec: + podSelector: + matchLabels: + tier: frontend + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 5001 # eveai-app + - protocol: TCP + port: 5003 # eveai-api + - protocol: TCP + port: 5004 # eveai-chat-client + +--- +# Allow app services to communicate with backend services +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-apps-to-backend + namespace: eveai-dev + labels: + app: eveai + environment: dev +spec: + podSelector: + matchLabels: + tier: backend + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + tier: frontend + ports: + - protocol: TCP + port: 6379 # Redis + - protocol: TCP + port: 9000 # MinIO S3 API + - protocol: TCP + port: 9001 # MinIO Console + - protocol: TCP + port: 5432 # PostgreSQL + +--- +# Allow internal service communication within eveai-dev namespace +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-internal-communication + namespace: eveai-dev + labels: + app: eveai + environment: dev +spec: + podSelector: {} + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: eveai-dev + +--- +# Allow monitoring services to scrape metrics +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-monitoring-scraping + namespace: eveai-dev + labels: + app: eveai + environment: dev +spec: + podSelector: + matchLabels: + tier: monitoring + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: prometheus + ports: + - protocol: TCP + port: 9090 # Prometheus + - protocol: TCP + port: 9091 # Pushgateway + - protocol: TCP + port: 5555 # Flower + - protocol: TCP + port: 3000 # Grafana + +--- +# Allow external access to monitoring services (NodePort) +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-external-monitoring + namespace: eveai-dev + labels: + app: eveai + environment: dev +spec: + podSelector: + matchLabels: + tier: monitoring + policyTypes: + - Ingress + ingress: + - {} # Allow all external traffic to monitoring services \ No newline at end of file diff --git a/k8s/dev/nginx-monitoring-services.yaml b/k8s/dev/nginx-monitoring-services.yaml deleted file mode 100644 index 99b9c3f..0000000 --- a/k8s/dev/nginx-monitoring-services.yaml +++ /dev/null @@ -1,419 +0,0 @@ -# Nginx and Monitoring Services for EveAI Dev Environment -# File: nginx-monitoring-services.yaml ---- -# Nginx Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx - namespace: eveai-dev - labels: - app: nginx - environment: dev -spec: - replicas: 1 - selector: - matchLabels: - app: nginx - template: - metadata: - labels: - app: nginx - spec: - containers: - - name: nginx - image: registry.ask-eve-ai-local.com/josakola/nginx:latest - ports: - - containerPort: 80 - - containerPort: 443 - envFrom: - - configMapRef: - name: eveai-config - - secretRef: - name: eveai-secrets - volumeMounts: - - name: nginx-logs - mountPath: /var/log/nginx - livenessProbe: - httpGet: - path: / - port: 80 - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - readinessProbe: - httpGet: - path: / - port: 80 - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 3 - resources: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "512Mi" - cpu: "500m" - volumes: - - name: nginx-logs - persistentVolumeClaim: - claimName: app-logs-pvc - restartPolicy: Always - ---- -# Nginx Service -apiVersion: v1 -kind: Service -metadata: - name: nginx-service - namespace: eveai-dev - labels: - app: nginx -spec: - type: NodePort - ports: - - port: 80 - targetPort: 80 - nodePort: 30080 # Maps to host port 3080 - protocol: TCP - name: http - - port: 443 - targetPort: 443 - nodePort: 30443 # Maps to host port 3443 - protocol: TCP - name: https - selector: - app: nginx - ---- -# Flower (Celery Monitoring) Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: flower - namespace: eveai-dev - labels: - app: flower - environment: dev -spec: - replicas: 1 - selector: - matchLabels: - app: flower - template: - metadata: - labels: - app: flower - spec: - containers: - - name: flower - image: registry.ask-eve-ai-local.com/josakola/flower:latest - ports: - - containerPort: 5555 - envFrom: - - configMapRef: - name: eveai-config - - secretRef: - name: eveai-secrets - resources: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "512Mi" - cpu: "300m" - restartPolicy: Always - ---- -# Flower Service -apiVersion: v1 -kind: Service -metadata: - name: flower-service - namespace: eveai-dev - labels: - app: flower -spec: - type: NodePort - ports: - - port: 5555 - targetPort: 5555 - nodePort: 30007 # Maps to host port 3007 - protocol: TCP - selector: - app: flower - ---- -# Prometheus PVC -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: prometheus-data-pvc - namespace: eveai-dev -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-storage - resources: - requests: - storage: 5Gi - selector: - matchLabels: - app: prometheus - environment: dev - ---- -# Prometheus Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: prometheus - namespace: eveai-dev - labels: - app: prometheus - environment: dev -spec: - replicas: 1 - selector: - matchLabels: - app: prometheus - template: - metadata: - labels: - app: prometheus - spec: - containers: - - name: prometheus - image: registry.ask-eve-ai-local.com/josakola/prometheus:latest - ports: - - containerPort: 9090 - args: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/etc/prometheus/console_libraries' - - '--web.console.templates=/etc/prometheus/consoles' - - '--web.enable-lifecycle' - volumeMounts: - - name: prometheus-data - mountPath: /prometheus - livenessProbe: - httpGet: - path: /-/healthy - port: 9090 - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /-/ready - port: 9090 - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 3 - resources: - requests: - memory: "512Mi" - cpu: "300m" - limits: - memory: "2Gi" - cpu: "1000m" - volumes: - - name: prometheus-data - persistentVolumeClaim: - claimName: prometheus-data-pvc - restartPolicy: Always - ---- -# Prometheus Service -apiVersion: v1 -kind: Service -metadata: - name: prometheus-service - namespace: eveai-dev - labels: - app: prometheus -spec: - type: NodePort - ports: - - port: 9090 - targetPort: 9090 - nodePort: 30010 # Maps to host port 3010 - protocol: TCP - selector: - app: prometheus - ---- -# Pushgateway Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: pushgateway - namespace: eveai-dev - labels: - app: pushgateway - environment: dev -spec: - replicas: 1 - selector: - matchLabels: - app: pushgateway - template: - metadata: - labels: - app: pushgateway - spec: - containers: - - name: pushgateway - image: prom/pushgateway:latest - ports: - - containerPort: 9091 - livenessProbe: - httpGet: - path: /-/healthy - port: 9091 - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /-/ready - port: 9091 - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 3 - resources: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "512Mi" - cpu: "300m" - restartPolicy: Always - ---- -# Pushgateway Service -apiVersion: v1 -kind: Service -metadata: - name: pushgateway-service - namespace: eveai-dev - labels: - app: pushgateway -spec: - type: NodePort - ports: - - port: 9091 - targetPort: 9091 - nodePort: 30011 # Maps to host port 3011 - protocol: TCP - selector: - app: pushgateway - ---- -# Grafana PVC -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: grafana-data-pvc - namespace: eveai-dev -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-storage - resources: - requests: - storage: 1Gi - selector: - matchLabels: - app: grafana - environment: dev - ---- -# Grafana Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: grafana - namespace: eveai-dev - labels: - app: grafana - environment: dev -spec: - replicas: 1 - selector: - matchLabels: - app: grafana - template: - metadata: - labels: - app: grafana - spec: - containers: - - name: grafana - image: registry.ask-eve-ai-local.com/josakola/grafana:latest - ports: - - containerPort: 3000 - env: - - name: GF_SECURITY_ADMIN_USER - value: "admin" - - name: GF_SECURITY_ADMIN_PASSWORD - value: "admin" - - name: GF_USERS_ALLOW_SIGN_UP - value: "false" - volumeMounts: - - name: grafana-data - mountPath: /var/lib/grafana - livenessProbe: - httpGet: - path: /api/health - port: 3000 - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /api/health - port: 3000 - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 3 - resources: - requests: - memory: "256Mi" - cpu: "200m" - limits: - memory: "1Gi" - cpu: "500m" - volumes: - - name: grafana-data - persistentVolumeClaim: - claimName: grafana-data-pvc - restartPolicy: Always - ---- -# Grafana Service -apiVersion: v1 -kind: Service -metadata: - name: grafana-service - namespace: eveai-dev - labels: - app: grafana -spec: - type: NodePort - ports: - - port: 3000 - targetPort: 3000 - nodePort: 30012 # Maps to host port 3012 - protocol: TCP - selector: - app: grafana \ No newline at end of file diff --git a/k8s/dev/persistent-volumes.yaml b/k8s/dev/persistent-volumes.yaml index af058cf..8355f19 100644 --- a/k8s/dev/persistent-volumes.yaml +++ b/k8s/dev/persistent-volumes.yaml @@ -27,33 +27,6 @@ spec: values: - eveai-dev-cluster-control-plane ---- -# Redis Data Storage -apiVersion: v1 -kind: PersistentVolume -metadata: - name: redis-data-pv - labels: - app: redis - environment: dev -spec: - capacity: - storage: 2Gi - accessModes: - - ReadWriteOnce - persistentVolumeReclaimPolicy: Retain - storageClassName: local-storage - local: - path: /mnt/redis-data - nodeAffinity: - required: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - eveai-dev-cluster-control-plane - --- # Application Logs Storage apiVersion: v1 diff --git a/k8s/dev/redis-minio-services.yaml b/k8s/dev/redis-minio-services.yaml index 6b705f3..a0b72ce 100644 --- a/k8s/dev/redis-minio-services.yaml +++ b/k8s/dev/redis-minio-services.yaml @@ -1,24 +1,5 @@ # Redis and MinIO Services for EveAI Dev Environment # File: redis-minio-services.yaml ---- -# Redis Persistent Volume Claim -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: redis-data-pvc - namespace: eveai-dev -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-storage - resources: - requests: - storage: 2Gi - selector: - matchLabels: - app: redis - environment: dev - --- # Redis Deployment apiVersion: apps/v1 @@ -38,15 +19,13 @@ spec: metadata: labels: app: redis + tier: backend spec: containers: - name: redis image: redis:7.2.5 ports: - containerPort: 6379 - volumeMounts: - - name: redis-data - mountPath: /data livenessProbe: exec: command: @@ -74,10 +53,6 @@ spec: limits: memory: "512Mi" cpu: "500m" - volumes: - - name: redis-data - persistentVolumeClaim: - claimName: redis-data-pvc restartPolicy: Always --- @@ -137,6 +112,7 @@ spec: metadata: labels: app: minio + tier: backend spec: containers: - name: minio @@ -235,4 +211,22 @@ spec: protocol: TCP name: console selector: - app: minio \ No newline at end of file + app: minio + +--- +# Redis Alias Service (for application compatibility) +apiVersion: v1 +kind: Service +metadata: + name: redis + namespace: eveai-dev + labels: + app: redis +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: 6379 + protocol: TCP + selector: + app: redis \ No newline at end of file diff --git a/k8s/dev/setup-dev-cluster.sh b/k8s/dev/setup-dev-cluster.sh index 7f33b4c..8c11bea 100755 --- a/k8s/dev/setup-dev-cluster.sh +++ b/k8s/dev/setup-dev-cluster.sh @@ -67,7 +67,6 @@ create_host_directories() { directories=( "$BASE_DIR/minio" - "$BASE_DIR/redis" "$BASE_DIR/logs" "$BASE_DIR/prometheus" "$BASE_DIR/grafana" @@ -107,7 +106,7 @@ create_cluster() { KIND_CONFIG="kind-dev-cluster.yaml" if [ ! -f "${KIND_CONFIG}" ]; then - print_error "Config '${KIND_CONFIG}' niet gevonden in $(pwd)" + print_error "Config '${KIND_CONFIG}' not found in $(pwd)" exit 1 fi @@ -146,30 +145,6 @@ create_cluster() { print_success "Kind cluster created successfully" } -# Configure container resource limits to prevent CRI issues -configure_container_limits() { - print_status "Configuring container resource limits..." - - # Configure file descriptor and inotify limits to prevent CRI plugin failures - podman exec "${CLUSTER_NAME}-control-plane" sh -c ' - echo "fs.inotify.max_user_instances = 1024" >> /etc/sysctl.conf - echo "fs.inotify.max_user_watches = 524288" >> /etc/sysctl.conf - echo "fs.file-max = 2097152" >> /etc/sysctl.conf - sysctl -p - ' - - # Restart containerd to apply new limits - print_status "Restarting containerd with new limits..." - podman exec "${CLUSTER_NAME}-control-plane" systemctl restart containerd - - # Wait for containerd to stabilize - sleep 10 - - # Restart kubelet to ensure proper CRI communication - podman exec "${CLUSTER_NAME}-control-plane" systemctl restart kubelet - - print_success "Container limits configured and services restarted" -} # Verify CRI status and functionality verify_cri_status() { @@ -233,8 +208,23 @@ install_ingress_controller() { if [ $? -eq 0 ]; then print_success "NGINX Ingress Controller installed and ready" else - print_error "Failed to install or start Ingress Controller" - exit 1 + print_warning "Ingress Controller not ready, trying to label node..." + # Label the node for ingress (fallback for scheduling issues) + kubectl label node eveai-dev-cluster-control-plane ingress-ready=true --overwrite + + # Wait again for Ingress Controller to be ready + print_status "Waiting for Ingress Controller after node labeling..." + kubectl wait --namespace ingress-nginx \ + --for=condition=ready pod \ + --selector=app.kubernetes.io/component=controller \ + --timeout=300s + + if [ $? -eq 0 ]; then + print_success "NGINX Ingress Controller ready after node labeling" + else + print_error "Failed to install or start Ingress Controller even after node labeling" + exit 1 + fi fi # Verify Ingress Controller status @@ -247,16 +237,38 @@ install_ingress_controller() { apply_manifests() { print_status "Applying Kubernetes manifests..." - # Apply in correct order + # Apply base manifests in correct order (namespace.yaml handles namespace creation) manifests=( + "namespace.yaml" "persistent-volumes.yaml" "config-secrets.yaml" + "network-policies.yaml" ) for manifest in "${manifests[@]}"; do if [ -f "$manifest" ]; then print_status "Applying $manifest..." - kubectl apply -f "$manifest" + + # Apply with retry logic for race condition handling + local max_attempts=3 + local attempt=1 + local success=false + + while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do + if kubectl apply -f "$manifest"; then + print_success "Successfully applied: $manifest" + success=true + else + if [ $attempt -lt $max_attempts ]; then + print_warning "Attempt $attempt failed for $manifest, retrying in 3 seconds..." + sleep 3 + attempt=$((attempt + 1)) + else + print_error "Failed to apply $manifest after $max_attempts attempts" + return 1 + fi + fi + done else print_warning "Manifest $manifest not found, skipping..." fi @@ -265,6 +277,43 @@ apply_manifests() { print_success "Base manifests applied successfully" } +# Configure registry certificates and containerd +configure_registry_certificates() { + print_status "Configuring registry certificates and containerd..." + + # Update CA certificates in the cluster + print_status "Updating CA certificates..." + kubectl debug node/eveai-dev-cluster-control-plane -it --image=busybox -- sh -c " + chroot /host update-ca-certificates 2>/dev/null || true + " 2>/dev/null || print_warning "Certificate update may have failed" + + # Create containerd registry configuration directory + print_status "Creating containerd registry configuration..." + kubectl debug node/eveai-dev-cluster-control-plane -it --image=busybox -- sh -c " + chroot /host mkdir -p /etc/containerd/certs.d/registry.ask-eve-ai-local.com + " 2>/dev/null || print_warning "Failed to create containerd config directory" + + # Configure registry hosts.toml + print_status "Configuring registry hosts.toml..." + kubectl debug node/eveai-dev-cluster-control-plane -it --image=busybox -- sh -c " + chroot /host sh -c 'cat > /etc/containerd/certs.d/registry.ask-eve-ai-local.com/hosts.toml << EOF +server = \"https://registry.ask-eve-ai-local.com\" + +[host.\"https://registry.ask-eve-ai-local.com\"] + capabilities = [\"pull\", \"resolve\"] + ca = [\"/usr/local/share/ca-certificates/mkcert-ca.crt\"] +EOF' + " 2>/dev/null || print_warning "Failed to create hosts.toml" + + # Restart containerd to apply configuration + print_status "Restarting containerd..." + kubectl debug node/eveai-dev-cluster-control-plane -it --image=busybox -- sh -c " + chroot /host systemctl restart containerd + " 2>/dev/null || print_warning "Failed to restart containerd" + + print_success "Registry certificates and containerd configured" +} + # Verify cluster status verify_cluster() { print_status "Verifying cluster status..." @@ -300,10 +349,10 @@ main() { check_prerequisites create_host_directories create_cluster - configure_container_limits verify_cri_status install_ingress_controller apply_manifests + configure_registry_certificates verify_cluster echo "" diff --git a/k8s/k8s_env_switch.sh b/k8s/k8s_env_switch.sh index ca60577..1590018 100644 --- a/k8s/k8s_env_switch.sh +++ b/k8s/k8s_env_switch.sh @@ -214,6 +214,24 @@ krefresh() { deploy_service_group "$group" } +# Structured deployment of all services (like deploy-all-services.sh) +kup-all-structured() { + log_operation "INFO" "Starting structured deployment of all services" + deploy_all_structured +} + +# Test connectivity to all services +ktest() { + log_operation "INFO" "Testing service connectivity" + test_connectivity_ingress +} + +# Show connection information +kinfo() { + log_operation "INFO" "Showing connection information" + show_connection_info +} + # Individual service management functions for apps group kup-app() { log_operation "INFO" "Starting eveai-app" @@ -416,6 +434,7 @@ if [[ -n "$ZSH_VERSION" ]]; then # In zsh, functions are automatically available in subshells # But we can make them available globally with typeset typeset -f kup kdown kstop kstart kps klogs krefresh > /dev/null + typeset -f kup-all-structured ktest kinfo > /dev/null typeset -f kup-app kdown-app kstop-app kstart-app > /dev/null typeset -f kup-api kdown-api kstop-api kstart-api > /dev/null typeset -f kup-chat-client kdown-chat-client kstop-chat-client kstart-chat-client > /dev/null @@ -427,6 +446,7 @@ if [[ -n "$ZSH_VERSION" ]]; then else # Bash style export export -f kup kdown kstop kstart kps klogs krefresh + export -f kup-all-structured ktest kinfo export -f kup-app kdown-app kstop-app kstart-app export -f kup-api kdown-api kstop-api kstart-api export -f kup-chat-client kdown-chat-client kstop-chat-client kstart-chat-client @@ -450,6 +470,9 @@ echo " kstop [group] - stop service group without removal" echo " kstart [group] - start stopped service group" echo " krefresh [group] - restart service group" echo "" +echo " Structured Deployment:" +echo " kup-all-structured - deploy all services in structured order (like deploy-all-services.sh)" +echo "" echo " Individual App Services:" echo " kup-app - start eveai-app" echo " kup-api - start eveai-api" @@ -460,9 +483,11 @@ echo " kup-beat - start eveai-beat" echo " kup-entitlements - start eveai-entitlements" echo " (and corresponding kdown-, kstop-, kstart- functions)" echo "" -echo " Status & Logs:" +echo " Status & Testing:" echo " kps - show service status" echo " klogs [service] - view service logs" +echo " ktest - test service connectivity" +echo " kinfo - show connection information" echo "" echo " Cluster Management:" echo " cluster-start - start cluster" diff --git a/k8s/scripts/k8s-functions.sh b/k8s/scripts/k8s-functions.sh index 55926fd..5723dfe 100644 --- a/k8s/scripts/k8s-functions.sh +++ b/k8s/scripts/k8s-functions.sh @@ -41,10 +41,28 @@ deploy_service_group() { log_operation "INFO" "Applying YAML file: $yaml_file" log_kubectl_command "kubectl apply -f $full_path" - if kubectl apply -f "$full_path"; then - log_operation "SUCCESS" "Successfully applied: $yaml_file" - else - log_operation "ERROR" "Failed to apply: $yaml_file" + # Apply with retry logic for namespace race condition handling + local max_attempts=3 + local attempt=1 + local file_success=false + + while [[ $attempt -le $max_attempts ]] && [[ "$file_success" == "false" ]]; do + if kubectl apply -f "$full_path"; then + log_operation "SUCCESS" "Successfully applied: $yaml_file" + file_success=true + else + if [[ $attempt -lt $max_attempts ]]; then + log_operation "WARNING" "Attempt $attempt failed for $yaml_file, retrying after namespace sync..." + sleep 3 + attempt=$((attempt + 1)) + else + log_operation "ERROR" "Failed to apply $yaml_file after $max_attempts attempts" + success=false + fi + fi + done + + if [[ "$file_success" == "false" ]]; then success=false fi done @@ -405,13 +423,167 @@ restart_service() { log_operation "SUCCESS" "Successfully restarted service: $service" } +# Test service connectivity via Ingress +test_connectivity_ingress() { + log_operation "INFO" "Testing Ingress connectivity..." + + # Test Ingress endpoints + local endpoints=( + "http://minty.ask-eve-ai-local.com:3080/admin/" + "http://minty.ask-eve-ai-local.com:3080/api/healthz/ready" + "http://minty.ask-eve-ai-local.com:3080/chat-client/" + "http://minty.ask-eve-ai-local.com:3080/static/" + "http://localhost:3009" # MinIO Console (direct) + "http://localhost:3010" # Prometheus (direct) + "http://localhost:3012" # Grafana (direct) + ) + + local success_count=0 + local total_count=${#endpoints[@]} + + for endpoint in "${endpoints[@]}"; do + log_operation "INFO" "Testing $endpoint..." + if curl -f -s --max-time 10 "$endpoint" > /dev/null; then + log_operation "SUCCESS" "$endpoint is responding" + ((success_count++)) + else + log_operation "WARNING" "$endpoint is not responding (may still be starting up)" + fi + done + + echo "" + log_operation "INFO" "Connectivity test completed: $success_count/$total_count endpoints responding" + + if [[ $success_count -eq $total_count ]]; then + log_operation "SUCCESS" "All endpoints are responding" + return 0 + elif [[ $success_count -gt 0 ]]; then + log_operation "WARNING" "Some endpoints are not responding" + return 1 + else + log_operation "ERROR" "No endpoints are responding" + return 2 + fi +} + +# Show connection information for Ingress setup +show_connection_info() { + echo "" + echo "==================================================" + log_operation "SUCCESS" "EveAI $K8S_ENVIRONMENT Cluster Connection Info" + echo "==================================================" + echo "" + echo "🌐 Service URLs:" + echo " Main Application (via Ingress only):" + echo " • Main App: http://minty.ask-eve-ai-local.com:3080/admin/" + echo " • API: http://minty.ask-eve-ai-local.com:3080/api/" + echo " • Chat Client: http://minty.ask-eve-ai-local.com:3080/chat-client/" + echo " • Static Files: http://minty.ask-eve-ai-local.com:3080/static/" + echo "" + echo " Infrastructure (direct NodePort access):" + echo " • Redis: redis://minty.ask-eve-ai-local.com:3006" + echo " • MinIO S3: http://minty.ask-eve-ai-local.com:3008" + echo " • MinIO Console: http://minty.ask-eve-ai-local.com:3009" + echo "" + echo " Monitoring (direct NodePort access):" + echo " • Prometheus: http://minty.ask-eve-ai-local.com:3010" + echo " • Grafana: http://minty.ask-eve-ai-local.com:3012" + echo "" + echo "🔑 Default Credentials:" + echo " • MinIO: minioadmin / minioadmin" + echo " • Grafana: admin / admin" + echo " • Flower: Felucia / Jungles" + echo "" + echo "🛠️ Management Commands:" + echo " • kubectl get all -n $K8S_NAMESPACE" + echo " • kubectl get ingress -n $K8S_NAMESPACE" + echo " • kubectl logs -f deployment/eveai-app -n $K8S_NAMESPACE" + echo " • kubectl describe ingress eveai-ingress -n $K8S_NAMESPACE" + echo "" + echo "🗂️ Data Persistence:" + echo " • Host data path: \$HOME/k8s-data/$K8S_ENVIRONMENT/" + echo " • Logs path: \$HOME/k8s-data/$K8S_ENVIRONMENT/logs/" + echo "" + echo "📊 Environment Details:" + echo " • Environment: $K8S_ENVIRONMENT" + echo " • Version: $K8S_VERSION" + echo " • Cluster: $K8S_CLUSTER" + echo " • Namespace: $K8S_NAMESPACE" + echo " • Config Dir: $K8S_CONFIG_DIR" +} + +# Deploy all services in structured order (like deploy-all-services.sh) +deploy_all_structured() { + log_operation "INFO" "Starting structured deployment of all services" + + echo "" + echo "==================================================" + echo "🚀 Deploying EveAI $K8S_ENVIRONMENT Services" + echo "==================================================" + + # Stage 1: Infrastructure + log_operation "INFO" "Stage 1: Deploying infrastructure services..." + if ! deploy_service_group "infrastructure"; then + log_operation "ERROR" "Failed to deploy infrastructure services" + return 1 + fi + + log_operation "INFO" "Waiting for infrastructure to be ready..." + if ! wait_for_group_ready "infrastructure"; then + log_operation "ERROR" "Infrastructure services failed to become ready" + return 1 + fi + + sleep 5 + + # Stage 2: Application services + log_operation "INFO" "Stage 2: Deploying application services..." + if ! deploy_service_group "apps"; then + log_operation "ERROR" "Failed to deploy application services" + return 1 + fi + + log_operation "INFO" "Waiting for application services to be ready..." + if ! wait_for_group_ready "apps"; then + log_operation "WARNING" "Some application services may still be starting" + fi + + sleep 5 + + # Stage 3: Static files and ingress + log_operation "INFO" "Stage 3: Deploying static files and ingress..." + if ! deploy_service_group "static"; then + log_operation "ERROR" "Failed to deploy static services" + return 1 + fi + + # Stage 4: Monitoring services + log_operation "INFO" "Stage 4: Deploying monitoring services..." + if ! deploy_service_group "monitoring"; then + log_operation "WARNING" "Failed to deploy monitoring services (continuing anyway)" + fi + + sleep 10 + + # Final verification + log_operation "INFO" "Running final connectivity tests..." + test_connectivity_ingress + + show_connection_info + + log_operation "SUCCESS" "Structured deployment completed!" + return 0 +} + # Export functions for use in other scripts if [[ -n "$ZSH_VERSION" ]]; then typeset -f deploy_service_group stop_service_group start_service_group > /dev/null typeset -f deploy_individual_service stop_individual_service start_individual_service > /dev/null typeset -f wait_for_group_ready get_service_status show_service_status restart_service > /dev/null + typeset -f test_connectivity_ingress show_connection_info deploy_all_structured > /dev/null else export -f deploy_service_group stop_service_group start_service_group export -f deploy_individual_service stop_individual_service start_individual_service export -f wait_for_group_ready get_service_status show_service_status restart_service + export -f test_connectivity_ingress show_connection_info deploy_all_structured fi \ No newline at end of file