From 25ab9ccf238dc901556ee611f53ef645a235ea76 Mon Sep 17 00:00:00 2001 From: Josako Date: Fri, 29 Aug 2025 17:50:14 +0200 Subject: [PATCH] - Staging cluster werkend tot op phase 6 van cluster-install.md, inclusief HTTPS, Bunny, verificatie service. --- config/config.py | 15 + .../Production Setup/cluster-install.md | 578 ++++++++++++++---- documentation/scaleway-deployment-guide.md | 355 +++++++++++ .../verification/00-configmaps.yaml} | 191 +----- .../verification/01-deployment.yaml | 57 ++ .../applications/verification/02-service.yaml | 16 + .../verification/kustomization.yaml | 12 + .../base/infrastructure/00-namespaces.yaml | 16 + .../infrastructure/03-cluster-issuers.yaml | 15 + .../infrastructure/cert-manager-values.yaml | 52 ++ .../base/infrastructure/ingress-values.yaml | 46 ++ scaleway/manifests/base/kustomization.yaml | 17 + .../base/monitoring/kustomization.yaml | 16 + .../base/monitoring/prometheus-values.yaml | 71 +++ .../base/monitoring/values-monitoring.yaml | 131 ++++ .../base/networking/ingress-http-acme.yaml | 44 ++ .../base/networking/ingress-https.yaml | 80 +++ .../base/secrets/eveai-external-secrets.yaml | 38 ++ .../base/secrets/scaleway-secret-store.yaml | 19 + .../overlays/staging/kustomization.yaml | 29 + .../manifests/staging/cert-manager-setup.yaml | 88 --- .../staging/ingress-controller-setup.yaml | 285 --------- scaleway/scaleway_staging_setup.md | 243 -------- 23 files changed, 1525 insertions(+), 889 deletions(-) create mode 100644 documentation/scaleway-deployment-guide.md rename scaleway/manifests/{staging/staging-test-setup.yaml => base/applications/verification/00-configmaps.yaml} (77%) create mode 100644 scaleway/manifests/base/applications/verification/01-deployment.yaml create mode 100644 scaleway/manifests/base/applications/verification/02-service.yaml create mode 100644 scaleway/manifests/base/applications/verification/kustomization.yaml create mode 100644 scaleway/manifests/base/infrastructure/00-namespaces.yaml create mode 100644 scaleway/manifests/base/infrastructure/03-cluster-issuers.yaml create mode 100644 scaleway/manifests/base/infrastructure/cert-manager-values.yaml create mode 100644 scaleway/manifests/base/infrastructure/ingress-values.yaml create mode 100644 scaleway/manifests/base/kustomization.yaml create mode 100644 scaleway/manifests/base/monitoring/kustomization.yaml create mode 100644 scaleway/manifests/base/monitoring/prometheus-values.yaml create mode 100644 scaleway/manifests/base/monitoring/values-monitoring.yaml create mode 100644 scaleway/manifests/base/networking/ingress-http-acme.yaml create mode 100644 scaleway/manifests/base/networking/ingress-https.yaml create mode 100644 scaleway/manifests/base/secrets/eveai-external-secrets.yaml create mode 100644 scaleway/manifests/base/secrets/scaleway-secret-store.yaml create mode 100644 scaleway/manifests/overlays/staging/kustomization.yaml delete mode 100644 scaleway/manifests/staging/cert-manager-setup.yaml delete mode 100644 scaleway/manifests/staging/ingress-controller-setup.yaml delete mode 100644 scaleway/scaleway_staging_setup.md diff --git a/config/config.py b/config/config.py index d0bf420..e8c0ced 100644 --- a/config/config.py +++ b/config/config.py @@ -281,6 +281,21 @@ class DevConfig(Config): REDIS_URL = 'redis' REDIS_PORT = '6379' REDIS_BASE_URI = f'redis://{REDIS_URL}:{REDIS_PORT}' + REDIS_CERT_DATA = environ.get('REDIS_CERT') + + # TODO: Redis certificaat inbouwen + # Snippet: + # import ssl + # import redis + # + # # In je Redis connectie configuratie + # if REDIS_CERT_DATA: + # ssl_context = ssl.create_default_context() + # ssl_context.check_hostname = False + # ssl_context.verify_mode = ssl.CERT_NONE + # + # # Custom SSL context voor Redis + # SESSION_REDIS = redis.from_url(REDIS_BASE_URI, ssl=ssl_context) # Celery settings # eveai_app Redis Settings diff --git a/documentation/Production Setup/cluster-install.md b/documentation/Production Setup/cluster-install.md index 99b57df..7a16983 100644 --- a/documentation/Production Setup/cluster-install.md +++ b/documentation/Production Setup/cluster-install.md @@ -1,158 +1,514 @@ -# Cluster Install +# EveAI Cluster Installation Guide (Updated for Modular Kustomize Setup) -## Fase 1: Ingress Controller Setup -### Stap 1: Installeer de NGINX Ingress Controller +## Prerequisites -``` -kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.2/deploy/static/provider/cloud/deploy.yaml +### Required Tools +```bash +# Verify required tools are installed +kubectl version --client +kustomize version +helm version + +# Configure kubectl for Scaleway cluster +scw k8s kubeconfig install +kubectl cluster-info ``` -### Stap 2: Verifieer de Installatie -Kijk of de namespace is aangemaakt +### Scaleway Prerequisites +- Kubernetes cluster running +- Managed services configured (PostgreSQL, Redis, MinIO) +- Secrets stored in Scaleway Secret Manager: + - `eveai-app-keys`, `eveai-mistral`, `eveai-object-storage` + - `eveai-openai`, `eveai-postgresql`, `eveai-redis`, `eveai-redis-certificate` +- Flexible IP address (LoadBalancer) + - Eerst een loadbalancer aanmaken met publiek IP + - Daarna de loadbalancer verwijderen maar flexible IPs behouden + - Dit externe IP is het IP adres dat moet worden verwerkt in ingress-values.yaml! -``` -kubectl get namespaces | grep ingress-nginx +## New Modular Deployment Process + +### Phase 1: Infrastructure Foundation +Deploy core infrastructure components in the correct order: + +```bash +# 1. Deploy namespaces +kubectl apply -f scaleway/manifests/base/infrastructure/00-namespaces.yaml + +# 2. Add NGINX Ingress Helm repository +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx +helm repo update + +# 3. Deploy NGINX ingress controller via Helm +helm install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --create-namespace \ + --values scaleway/manifests/base/infrastructure/ingress-values.yaml + +# 4. Wait for ingress controller to be ready +kubectl wait --namespace ingress-nginx \ + --for=condition=ready pod \ + --selector=app.kubernetes.io/component=controller \ + --timeout=300s + +# 5. Add cert-manager Helm repository +helm repo add jetstack https://charts.jetstack.io +helm repo update + +# 6. Install cert-manager CRDs +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.15.3/cert-manager.crds.yaml + +# 7. Deploy cert-manager via Helm +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --values scaleway/manifests/base/infrastructure/cert-manager-values.yaml + +# 8. Wait for cert-manager to be ready +kubectl wait --namespace cert-manager \ + --for=condition=ready pod \ + --selector=app.kubernetes.io/name=cert-manager \ + --timeout=300s + +# 9. Deploy cluster issuers +kubectl apply -f scaleway/manifests/base/infrastructure/03-cluster-issuers.yaml ``` -Check of de pods worden gestart +### Phase 2: Verification Infrastructure Components -``` +```bash +# Verify ingress controller kubectl get pods -n ingress-nginx +kubectl get svc -n ingress-nginx + +# Verify cert-manager +kubectl get pods -n cert-manager +kubectl get clusterissuers + +# Check LoadBalancer external IP +kubectl get svc -n ingress-nginx ingress-nginx-controller ``` -Check de services (dit is het belangrijkste!) +### Phase 3: Monitoring Stack (Optional but Recommended) -``` -kubectl get services -n ingress-nginx +#### Add Prometheus Community Helm Repository + +```bash +# Add Prometheus community Helm repository +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +# Verify chart availability +helm search repo prometheus-community/kube-prometheus-stack ``` -Je zou zoiets als dit moeten zien: +#### Create Monitoring Values File -``` -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -ingress-nginx-controller NodePort 10.43.xxx.xxx 80:30080/TCP,443:30443/TCP 1m +Create `scaleway/manifests/base/monitoring/prometheus-values.yaml`: + +#### Deploy Monitoring Stack + +```bash +# Install complete monitoring stack via Helm +helm install monitoring prometheus-community/kube-prometheus-stack \ + --namespace monitoring \ + --create-namespace \ + --values scaleway/manifests/base/monitoring/prometheus-values.yaml + +# Monitor deployment progress +kubectl get pods -n monitoring -w +# Wait until all pods show STATUS: Running ``` -Watch de pods tot ze ready zijn +#### Verify Monitoring Deployment -``` -kubectl get pods -n ingress-nginx -w +```bash +# Check Helm release +helm list -n monitoring + +# Verify all components are running +kubectl get all -n monitoring + +# Check persistent volumes are created +kubectl get pvc -n monitoring + +# Check ServiceMonitor CRDs are available (for application monitoring) +kubectl get crd | grep monitoring.coreos.com ``` -Stop met Ctrl+C als je dit ziet: +#### Enable cert-manager Monitoring Integration -``` -ingress-nginx-controller-xxx 1/1 Running 0 2m +```bash +# Enable Prometheus monitoring in cert-manager now that ServiceMonitor CRDs exist +helm upgrade cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --set prometheus.enabled=true \ + --set prometheus.servicemonitor.enabled=true \ + --reuse-values ``` -Check de NodePorts, dit is cruciaal voor je Scaleway LoadBalancer configuratie: +#### Access Monitoring Services -``` -kubectl get service ingress-nginx-controller -n ingress-nginx -o yaml | grep nodePort +##### Grafana Dashboard +```bash +# Port forward to access Grafana +kubectl port-forward -n monitoring svc/monitoring-grafana 3000:80 + +# Access via browser: http://localhost:3000 +# Username: admin +# Password: admin123 (from values file) ``` -Of een overzichtelijker weergave: +##### Prometheus UI +```bash +# Port forward to access Prometheus +kubectl port-forward -n monitoring svc/monitoring-prometheus 9090:9090 & + +# Access via browser: http://localhost:9090 +# Check targets: http://localhost:9090/targets +``` + +#### Cleanup Commands (if needed) + +If you need to completely remove monitoring for a fresh start: + +```bash +# Uninstall Helm release +helm uninstall monitoring -n monitoring + +# Remove namespace +kubectl delete namespace monitoring + +# Remove any remaining cluster-wide resources +kubectl get clusterroles | grep monitoring | awk '{print $1}' | xargs -r kubectl delete clusterrole +kubectl get clusterrolebindings | grep monitoring | awk '{print $1}' | xargs -r kubectl delete clusterrolebinding +``` + +#### What we installed + +With monitoring successfully deployed: +- Grafana provides pre-configured Kubernetes dashboards +- Prometheus collects metrics from all cluster components +- ServiceMonitor CRDs are available for application-specific metrics +- AlertManager handles alert routing and notifications + +### Phase 4: Secrets + +#### Stap 1: Installeer External Secrets Operator + +```bash +# Add Helm repository +helm repo add external-secrets https://charts.external-secrets.io +helm repo update + +# Install External Secrets Operator +helm install external-secrets external-secrets/external-secrets \ + --namespace external-secrets-system \ + --create-namespace + +# Verify installation +kubectl get pods -n external-secrets-system + +# Check CRDs zijn geïnstalleerd +kubectl get crd | grep external-secrets +``` + +#### Stap 2: Maak Scaleway API credentials aan + +Je hebt Scaleway API credentials nodig voor de operator: + +```bash +# Create secret with Scaleway API credentials +kubectl create secret generic scaleway-credentials \ + --namespace eveai-staging \ + --from-literal=access-key="JOUW_SCALEWAY_ACCESS_KEY" \ + --from-literal=secret-key="JOUW_SCALEWAY_SECRET_KEY" +``` + +**Note:** Je krijgt deze credentials via: +- Scaleway Console → Project settings → API Keys +- Of via `scw iam api-key list` als je de CLI gebruikt + +#### Stap 3: Verifieer SecretStore configuratie + +Verifieer bestand: `scaleway/manifests/base/secrets/scaleway-secret-store.yaml`. Daar moet de juiste project ID worden ingevoerd. + +#### Stap 4: Verifieer ExternalSecret resource + +Verifieer bestand: `scaleway/manifests/base/secrets/eveai-external-secrets.yaml` + +**Belangrijk:** +- Scaleway provider vereist `key: name:secret-name` syntax +- SSL/TLS certificaten kunnen niet via `dataFrom/extract` worden opgehaald +- Certificaten moeten via `data` sectie worden toegevoegd + +#### Stap 5: Deploy secrets + +```bash +# Deploy SecretStore +kubectl apply -f scaleway/manifests/base/secrets/scaleway-secret-store.yaml + +# Deploy ExternalSecret +kubectl apply -f scaleway/manifests/base/secrets/eveai-external-secrets.yaml +``` + +#### Stap 6: Verificatie + +```bash +# Check ExternalSecret status +kubectl get externalsecrets -n eveai-staging + +# Check of het Kubernetes secret is aangemaakt +kubectl get secret eveai-secrets -n eveai-staging + +# Check alle keys in het secret +kubectl get secret eveai-secrets -n eveai-staging -o jsonpath='{.data}' | jq 'keys' + +# Check specifieke waarde (base64 decoded) +kubectl get secret eveai-secrets -n eveai-staging -o jsonpath='{.data.DB_HOST}' | base64 -d + +# Check ExternalSecret events voor troubleshooting +kubectl describe externalsecret eveai-external-secrets -n eveai-staging +``` + +#### Stap 7: Gebruik in deployment + +Je kunt nu deze secrets gebruiken in de deployment van de applicatie services die deze nodig hebben (TODO): + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: eveai-app + namespace: eveai-staging +spec: + template: + spec: + containers: + - name: eveai-app + envFrom: + - secretRef: + name: eveai-secrets # Alle environment variables uit één secret + # Je Python code gebruikt gewoon environ.get('DB_HOST') etc. +``` + +#### Stap 8: Redis certificaat gebruiken in Python + +Voor SSL Redis connecties met het certificaat: + +```python +# In je config.py +import tempfile +import ssl +import redis +from os import environ + +class StagingConfig(Config): + REDIS_CERT_DATA = environ.get('REDIS_CERT') + + def create_redis_connection(self): + if self.REDIS_CERT_DATA: + # Schrijf certificaat naar tijdelijk bestand + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.pem') as f: + f.write(self.REDIS_CERT_DATA) + cert_path = f.name + + # Redis connectie met SSL certificaat + return redis.from_url( + self.REDIS_BASE_URI, + ssl_cert_reqs=ssl.CERT_REQUIRED, + ssl_ca_certs=cert_path + ) + else: + return redis.from_url(self.REDIS_BASE_URI) + + # Gebruik voor session Redis + SESSION_REDIS = property(lambda self: self.create_redis_connection()) +``` + +#### Scaleway Secret Manager Vereisten + +Voor deze setup moeten je secrets in Scaleway Secret Manager correct gestructureerd zijn: + +**JSON secrets (eveai-postgresql, eveai-redis, etc.):** +```json +{ + "DB_HOST": "your-postgres-host.rdb.fr-par.scw.cloud", + "DB_USER": "eveai_user", + "DB_PASS": "your-password", + "DB_NAME": "eveai_staging", + "DB_PORT": "5432" +} +``` + +**SSL/TLS Certificaat (eveai-redis-certificate):** +``` +-----BEGIN CERTIFICATE----- +MIIDGTCCAgGg...z69LXyY= +-----END CERTIFICATE----- +``` + +#### Voordelen van deze setup + +- **Automatische sync**: Secrets worden elke 5 minuten geüpdatet +- **Geen code wijzigingen**: Je `environ.get()` calls blijven werken +- **Secure**: Credentials zijn niet in manifests, alleen in cluster +- **Centralized**: Alle secrets in Scaleway Secret Manager +- **Auditable**: External Secrets Operator logt alle acties +- **SSL support**: TLS certificaten worden correct behandeld + +#### File structuur ``` +scaleway/manifests/base/secrets/ +├── scaleway-secret-store.yaml +└── eveai-external-secrets.yaml +``` + +### Phase 5: TLS en Network setup + +#### Deploy HTTP ACME ingress + +Om het certificaat aan te maken, moet in de DNS-zone een A-record worden aangemaakt dat rechtstreeks naar het IP van de loadbalancer wijst. +We maken nog geen CNAME aan naar Bunny.net. Anders gaat bunny.net het ACME proces mogelijks onderbreken. + +Om het certificaat aan te maken, moeten we een HTTP ACME ingress gebruiken. Anders kan het certificaat niet worden aangemaakt. + +``` +kubectl apply -f scaleway/manifests/base/networking/ingress-http-acme.yaml +``` + +Check of het certificaat is aangemaakt (READY moet true zijn): + +``` +kubectl get certificate evie-staging-tls -n eveai-staging + +# of met meer detail + +kubectl -n eveai-staging describe certificate evie-staging-tls +``` + +Dit kan even duren. Maar zodra het certificaat is aangemaakt, kan je de de https-only ingress opzetten: + +``` +kubectl apply -f scaleway/manifests/base/networking/ingress-https.yaml +``` + +Om bunny.net te gebruiken: +- Nu kan het CNAME-record dat verwijst naar de Bunny.net Pull zone worden aangemaakt. +- In bunny.net moet in de pull-zone worden verwezen naar de loadbalancer IP via het HTTPS-protocol. + +### Phase 6: Verification Service + +Deze service kan ook al in Phase 5 worden geïnstalleerd om te verifiëren of de volledige netwerkstack (over bunny, certificaat, ...) werkt. + +```bash +# Deploy verification service +kubectl apply -k scaleway/manifests/base/applications/verification/ + + +### Phase 7: Complete Staging Deployment + +```bash +# Deploy everything using the staging overlay +kubectl apply -k scaleway/manifests/overlays/staging/ + +# Verify complete deployment +kubectl get all -n eveai-staging +kubectl get ingress -n eveai-staging +kubectl get certificates -n eveai-staging +``` + +## Verification and Testing + +### Check Infrastructure Status +```bash +# Verify ingress controller +kubectl get pods -n ingress-nginx kubectl describe service ingress-nginx-controller -n ingress-nginx + +# Verify cert-manager +kubectl get pods -n cert-manager +kubectl get clusterissuers + +# Check certificate status (may take a few minutes to issue) +kubectl describe certificate evie-staging-tls -n eveai-staging ``` -Je zoekt naar iets zoals: +### Test Services +```bash +# Get external IP from LoadBalancer +kubectl get svc -n ingress-nginx ingress-nginx-controller -``` -HTTP: Port 80 → NodePort 30080 (of een ander hoog nummer) -HTTPS: Port 443 → NodePort 30443 (of een ander hoog nummer) -``` -### Stap 3: Check de scaleway loadbalancer -Er werd normaal gezien automatisch een loadbalancer aangemaakt. Check of dit klopt. Deze is automatisch correct geconfigureerd en kan niet worden aangepast. +# Test HTTPS access (replace with your domain) +curl -k https://evie-staging.askeveai.com/verify/health +curl -k https://evie-staging.askeveai.com/verify/info -### Stap 4: Verifieer de firewall rules - -- Ga in de console naar Compute - CPU & GPU Instances -- Ga naar de security groups tab -- Klik op de security group voor je cluster (Kapsule Default Security Group) -- Ga naar de rules tab, en check of de poort (3xxxx) is toegevoegd aan de firewall rules, en voeg die toe indien nog niet aanwezig. -- Stel dit eerst in voor de volledige ipv4 range - -### Stap 4: Test de Basis Setup -Test of de ingress controller intern bereikbaar is (vervang de IP en NodePort door je eigen): - -``` -kubectl run test-pod --image=curlimages/curl -it --rm -- curl -H "Host: evie.askeveai.com" http://172.16.16.5:31127 +# Test monitoring (if deployed) +kubectl port-forward -n monitoring svc/monitoring-grafana 3000:80 +# Access Grafana at http://localhost:3000 (admin/admin123) ``` -Er moet een 404 boodschap komen (dat is goed! Het betekent dat nginx draait) +## DNS Configuration -Test of de ingress controller extern bereikbaar is (pas IP aan): +### Update DNS Records +- Create A-record pointing to LoadBalancer external IP +- Or set up CNAME if using CDN -``` -curl -H "Host: evie.askeveai.com" http://51.159.204.52 -``` +### Test Domain Access +```bash +# Test domain resolution +nslookup evie-staging.askeveai.com -## Fase 2: Deploy test applicatie - -We hebben een kleine test applicatie geïntegreerd in staging-test-setup.yaml. Installeer deze via: - -``` -kubectl apply -f staging-test-setup.yaml -``` - -En check met - -``` -curl -H "Host: evie-staging.askeveai.com" http://51.159.204.52/verify/ -``` - -### Later Uitbreiden -Wanneer je echte services deploy, uncomment je de relevante ingress paths en deploy je de bijbehorende services. De verify service blijft beschikbaar voor debugging. -Deze setup geeft je een professionele staging environment met ingebouwde monitoring en debug capabilities. - -## Fase 3: Configureer DNS -Maak het juist A-record aan in de DNS zone. Dit moet verwijzen naar de publieke IP van de loadbalancer. - -Je kan testen met: - -``` -curl http://evie-staging.askeveai.com/verify/ -``` - -In de browser zal dit waarschijnlijk niet werken, omdat de site nog niet is beveiligd met SSL. - -## Fase 4: Bunny CDN Setup -Eerst zorg je dat Bunny klaar is om te werken. - -- Creëer een Pull zone - evie-staging -- Origin = http://[IP van load balancer] -- Host header = evie-staging.askeveai.com -- Force SSL - Aan - -Daarna wijzig je A-record in de DNS zone. (waarschijnlijk verwijderen en CNAME record toevoegen) - -## Fase 5: Introductie Secure communication - -### Installatie van SSL Certificaat in de bunny pull zone -- Voeg een hostname toe aan de bunny pull zone (evie-staging.askeveai.com) -- Voeg een SSL certificaat toe aan de bunny pull zone (volg gewoon de instructies) -- Enable Force SSL - -Je kan checken met: - -``` +# Test HTTPS access via domain curl https://evie-staging.askeveai.com/verify/ ``` -### Installatie cert-manager in de cluster +## CDN Setup (Bunny.net - Optional) -``` -kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.crds.yaml -kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.yaml +### Configure Pull Zone +- Create Pull zone: evie-staging +- Origin: https://[LoadBalancer-IP] (note HTTPS!) +- Host header: evie-staging.askeveai.com +- Force SSL: Enabled + +### Update DNS for CDN +- Change A-record to CNAME pointing to CDN endpoint +- Or update A-record to CDN IP + +## Key Differences from Old Setup + +### Advantages of New Modular Approach +1. **Modular Structure**: Separate infrastructure from applications +2. **Environment Management**: Easy staging/production separation +3. **HTTPS-First**: TLS certificates managed automatically +4. **Monitoring Integration**: Prometheus/Grafana via Helm charts +5. **Scaleway Integration**: Managed services secrets support +6. **Maintainability**: Clear separation of concerns + +### Migration Benefits +- **Organized**: Base configurations with environment overlays +- **Scalable**: Easy to add new services or environments +- **Secure**: HTTPS-only from deployment start +- **Observable**: Built-in monitoring stack +- **Automated**: Less manual intervention required + +## Troubleshooting + +### Common Issues +```bash +# Certificate not issued +kubectl describe certificate evie-staging-tls -n eveai-staging +kubectl logs -n cert-manager deployment/cert-manager + +# Ingress not accessible +kubectl describe ingress eveai-staging-ingress -n eveai-staging +kubectl logs -n ingress-nginx deployment/ingress-nginx-controller + +# Check events for issues +kubectl get events -n eveai-staging --sort-by='.lastTimestamp' ``` -En het cert-manager-setup.yaml manifest toepassen (zorgen dat email adres en domein correct zijn) - -``` -kubectl apply -f cert-manager-setup.yaml -``` +For detailed troubleshooting, refer to the main deployment guide: `documentation/scaleway-deployment-guide.md` diff --git a/documentation/scaleway-deployment-guide.md b/documentation/scaleway-deployment-guide.md new file mode 100644 index 0000000..bcbcaaa --- /dev/null +++ b/documentation/scaleway-deployment-guide.md @@ -0,0 +1,355 @@ +# EveAI Scaleway Deployment Guide + +## Overview + +This guide covers the deployment of EveAI to Scaleway Kubernetes using a modular Kustomize structure with Helm integration for monitoring services. + +## Architecture + +### Managed Services (Scaleway) +- **PostgreSQL**: Database service +- **Redis**: Message broker and cache +- **MinIO**: Object storage (S3-compatible) +- **Secret Manager**: Secure storage for secrets + +### Kubernetes Services +- **Infrastructure**: Ingress Controller, Cert-Manager, TLS certificates +- **Applications**: EveAI services (app, api, workers, etc.) +- **Monitoring**: Prometheus, Grafana, Pushgateway (via Helm) +- **Verification**: Permanent cluster health monitoring service + +## Directory Structure + +``` +scaleway/manifests/ +├── base/ # Base configurations +│ ├── infrastructure/ # Core infrastructure +│ │ ├── 00-namespaces.yaml # Namespace definitions +│ │ ├── 01-ingress-controller.yaml # NGINX Ingress Controller +│ │ ├── 02-cert-manager.yaml # Cert-Manager setup +│ │ └── 03-cluster-issuers.yaml # Let's Encrypt issuers +│ ├── applications/ # Application services +│ │ └── verification/ # Verification service +│ │ ├── 00-configmaps.yaml # HTML content and nginx config +│ │ ├── 01-deployment.yaml # Deployment specification +│ │ └── 02-service.yaml # Service definition +│ ├── monitoring/ # Monitoring stack (Helm) +│ │ ├── kustomization.yaml # Helm chart integration +│ │ └── values-monitoring.yaml # Prometheus stack values +│ ├── secrets/ # Secret definitions +│ │ └── scaleway-secrets.yaml # Scaleway Secret Manager integration +│ └── networking/ # Network configuration +│ └── ingress-https.yaml # HTTPS-only ingress +└── overlays/ # Environment-specific configs + ├── staging/ # Staging environment + │ └── kustomization.yaml # Staging overlay + └── production/ # Production environment (future) + └── kustomization.yaml # Production overlay +``` + +## Prerequisites + +### 1. Scaleway Setup +- Kubernetes cluster running +- Managed services configured: + - PostgreSQL database + - Redis instance + - MinIO object storage +- Secrets stored in Scaleway Secret Manager: + - `eveai-app-keys` + - `eveai-mistral` + - `eveai-object-storage` + - `eveai-openai` + - `eveai-postgresql` + - `eveai-redis` + - `eveai-redis-certificate` + +### 2. Local Tools +```bash +# Install required tools +kubectl version --client +kustomize version +helm version + +# Install Kustomize Helm plugin +kubectl kustomize --enable-helm +``` + +### 3. Cluster Access +```bash +# Configure kubectl for Scaleway cluster +scw k8s kubeconfig install +kubectl cluster-info +``` + +## Deployment Process + +### Phase 1: Infrastructure Foundation + +Deploy core infrastructure components in order: + +```bash +# 1. Deploy namespaces +kubectl apply -f scaleway/manifests/base/infrastructure/00-namespaces.yaml + +# 2. Deploy ingress controller +kubectl apply -f scaleway/manifests/base/infrastructure/01-ingress-controller.yaml + +# Wait for ingress controller to be ready +kubectl wait --namespace ingress-nginx \ + --for=condition=ready pod \ + --selector=app.kubernetes.io/component=controller \ + --timeout=300s + +# 3. Install cert-manager CRDs (required first) +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.crds.yaml + +# 4. Deploy cert-manager +kubectl apply -f scaleway/manifests/base/infrastructure/02-cert-manager.yaml + +# Wait for cert-manager to be ready +kubectl wait --namespace cert-manager \ + --for=condition=ready pod \ + --selector=app.kubernetes.io/name=cert-manager \ + --timeout=300s + +# 5. Deploy cluster issuers +kubectl apply -f scaleway/manifests/base/infrastructure/03-cluster-issuers.yaml +``` + +### Phase 2: Monitoring Stack + +Deploy monitoring services using Helm integration: + +```bash +# Add Prometheus community Helm repository +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +# Deploy monitoring stack via Kustomize + Helm +kubectl kustomize --enable-helm scaleway/manifests/base/monitoring/ | kubectl apply -f - + +# Verify monitoring deployment +kubectl get pods -n monitoring +kubectl get pvc -n monitoring +``` + +### Phase 3: Application Services + +Deploy verification service and secrets: + +```bash +# Deploy secrets (update with actual Scaleway Secret Manager values first) +kubectl apply -f scaleway/manifests/base/secrets/scaleway-secrets.yaml + +# Deploy verification service +kubectl apply -k scaleway/manifests/base/applications/verification/ + +# Deploy HTTPS ingress +kubectl apply -f scaleway/manifests/base/networking/ingress-https.yaml +``` + +### Phase 4: Complete Staging Deployment + +Deploy everything using the staging overlay: + +```bash +# Deploy complete staging environment +kubectl apply -k scaleway/manifests/overlays/staging/ + +# Verify deployment +kubectl get all -n eveai-staging +kubectl get ingress -n eveai-staging +kubectl get certificates -n eveai-staging +``` + +## Verification and Testing + +### 1. Check Infrastructure +```bash +# Verify ingress controller +kubectl get pods -n ingress-nginx +kubectl get svc -n ingress-nginx + +# Verify cert-manager +kubectl get pods -n cert-manager +kubectl get clusterissuers + +# Check certificate status +kubectl describe certificate evie-staging-tls -n eveai-staging +``` + +### 2. Test Verification Service +```bash +# Get external IP +kubectl get svc -n ingress-nginx + +# Test HTTPS access (replace with actual IP/domain) +curl -k https://evie-staging.askeveai.com/verify/health +curl -k https://evie-staging.askeveai.com/verify/info +``` + +### 3. Monitor Services +```bash +# Check monitoring stack +kubectl get pods -n monitoring +kubectl port-forward -n monitoring svc/monitoring-grafana 3000:80 + +# Access Grafana at http://localhost:3000 +# Default credentials: admin/admin123 +``` + +## Secret Management + +### Updating Secrets from Scaleway Secret Manager + +The secrets in `scaleway-secrets.yaml` use template placeholders. To use actual values: + +1. **Manual approach**: Replace template values with actual secrets +2. **Automated approach**: Use a secret management tool like External Secrets Operator + +Example manual update: +```bash +# Replace template placeholders like: +# password: "{{ .Values.database.password }}" +# +# With actual values from Scaleway Secret Manager: +# password: "actual-password-from-scaleway" +``` + +### Recommended: External Secrets Operator + +For production, consider using External Secrets Operator to automatically sync from Scaleway Secret Manager: + +```bash +# Install External Secrets Operator +helm repo add external-secrets https://charts.external-secrets.io +helm install external-secrets external-secrets/external-secrets -n external-secrets-system --create-namespace +``` + +## Monitoring and Observability + +### Grafana Dashboards +- **URL**: `https://evie-staging.askeveai.com/monitoring` (when ingress path is configured) +- **Credentials**: admin/admin123 (change in production) +- **Pre-configured**: EveAI-specific dashboards in `/EveAI` folder + +### Prometheus Metrics +- **Internal URL**: `http://monitoring-prometheus:9090` +- **Scrapes**: Kubernetes metrics, application metrics, Scaleway managed services + +### Pushgateway +- **Internal URL**: `http://monitoring-pushgateway:9091` +- **Usage**: For batch job metrics from EveAI workers + +## Troubleshooting + +### Common Issues + +1. **Certificate not issued** + ```bash + kubectl describe certificate evie-staging-tls -n eveai-staging + kubectl logs -n cert-manager deployment/cert-manager + ``` + +2. **Ingress not accessible** + ```bash + kubectl describe ingress eveai-staging-ingress -n eveai-staging + kubectl logs -n ingress-nginx deployment/ingress-nginx-controller + ``` + +3. **Monitoring stack issues** + ```bash + kubectl logs -n monitoring deployment/monitoring-prometheus-server + kubectl get pvc -n monitoring # Check storage + ``` + +4. **Secret issues** + ```bash + kubectl get secrets -n eveai-staging + kubectl describe secret database-secrets -n eveai-staging + ``` + +### Useful Commands + +```bash +# View all resources in staging +kubectl get all -n eveai-staging + +# Check resource usage +kubectl top pods -n eveai-staging +kubectl top nodes + +# View logs +kubectl logs -f deployment/verify-service -n eveai-staging + +# Port forwarding for local access +kubectl port-forward -n eveai-staging svc/verify-service 8080:80 +``` + +## Scaling and Updates + +### Scaling Services +```bash +# Scale verification service +kubectl scale deployment verify-service --replicas=3 -n eveai-staging + +# Update image +kubectl set image deployment/verify-service nginx=nginx:1.21-alpine -n eveai-staging +``` + +### Rolling Updates +```bash +# Update using Kustomize +kubectl apply -k scaleway/manifests/overlays/staging/ + +# Check rollout status +kubectl rollout status deployment/verify-service -n eveai-staging +``` + +## Production Deployment + +For production deployment: + +1. Create `scaleway/manifests/overlays/production/kustomization.yaml` +2. Update domain names and certificates +3. Adjust resource limits and replicas +4. Use production Let's Encrypt issuer +5. Configure production monitoring and alerting + +```bash +# Production deployment +kubectl apply -k scaleway/manifests/overlays/production/ +``` + +## Security Considerations + +1. **Secrets**: Use Scaleway Secret Manager integration +2. **TLS**: HTTPS-only with automatic certificate renewal +3. **Network**: Ingress-based routing with proper annotations +4. **RBAC**: Kubernetes role-based access control +5. **Images**: Use specific tags, not `latest` + +## Maintenance + +### Regular Tasks +- Monitor certificate expiration +- Update Helm charts and container images +- Review resource usage and scaling +- Backup monitoring data +- Update secrets rotation + +### Monitoring Alerts +Configure alerts for: +- Certificate expiration (< 30 days) +- Pod failures and restarts +- Resource usage thresholds +- External service connectivity + +## Support + +For issues and questions: +1. Check logs using kubectl commands above +2. Verify Scaleway managed services status +3. Review Kubernetes events: `kubectl get events -n eveai-staging` +4. Check monitoring dashboards for system health \ No newline at end of file diff --git a/scaleway/manifests/staging/staging-test-setup.yaml b/scaleway/manifests/base/applications/verification/00-configmaps.yaml similarity index 77% rename from scaleway/manifests/staging/staging-test-setup.yaml rename to scaleway/manifests/base/applications/verification/00-configmaps.yaml index 71d1c08..59f751b 100644 --- a/scaleway/manifests/staging/staging-test-setup.yaml +++ b/scaleway/manifests/base/applications/verification/00-configmaps.yaml @@ -1,15 +1,3 @@ -# staging-test-setup.yaml -# Complete test and debug setup for EveAI staging environment - -apiVersion: v1 -kind: Namespace -metadata: - name: eveai-staging - labels: - environment: staging - app: eveai - ---- # ConfigMap with HTML content for the test interface apiVersion: v1 kind: ConfigMap @@ -331,172 +319,51 @@ data: add_header Content-Type text/plain; } - # Health endpoint + # Handle /verify prefix paths - NEW + location /verify/health { + try_files /health.html =404; + } + + location /verify/info { + try_files /info.html =404; + } + + location /verify/headers { + try_files /headers.html =404; + } + + location /verify/network { + try_files /network.html =404; + } + + # /verify root - serve main interface + location /verify/ { + try_files /index.html =404; + } + + location /verify { + try_files /index.html =404; + } + + # Original paths (for direct access) location /health { try_files /health.html =404; } - # Info endpoint location /info { try_files /info.html =404; } - # Headers analysis location /headers { try_files /headers.html =404; } - # Network tests location /network { try_files /network.html =404; } - # Main interface - serve index.html for all other requests + # Main interface - serve index.html for root location / { try_files $uri $uri/ /index.html; } - } - ---- -# Verification service deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: verify-service - namespace: eveai-staging - labels: - app: verify-service - component: verification - environment: staging -spec: - replicas: 1 - selector: - matchLabels: - app: verify-service - template: - metadata: - labels: - app: verify-service - component: verification - spec: - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html-content - mountPath: /usr/share/nginx/html - - name: nginx-config - mountPath: /etc/nginx/conf.d - resources: - requests: - memory: "32Mi" - cpu: "50m" - limits: - memory: "64Mi" - cpu: "100m" - livenessProbe: - httpGet: - path: /verify/health - port: 80 - initialDelaySeconds: 10 - periodSeconds: 10 - readinessProbe: - httpGet: - path: /verify/health - port: 80 - initialDelaySeconds: 5 - periodSeconds: 5 - volumes: - - name: html-content - configMap: - name: verify-content - - name: nginx-config - configMap: - name: verify-nginx-config - ---- -# Service for the verification app -apiVersion: v1 -kind: Service -metadata: - name: verify-service - namespace: eveai-staging - labels: - app: verify-service -spec: - selector: - app: verify-service - ports: - - port: 80 - targetPort: 80 - name: http - type: ClusterIP - ---- -# Ingress rules with path-based routing -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: eveai-staging-ingress - namespace: eveai-staging - labels: - app: eveai - environment: staging - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/ssl-redirect: "false" - nginx.ingress.kubernetes.io/proxy-body-size: "10m" - nginx.ingress.kubernetes.io/proxy-read-timeout: "300" -spec: - ingressClassName: nginx - rules: - - host: evie-staging.askeveai.com - http: - paths: - # Verification service paths - - path: /verify - pathType: Prefix - backend: - service: - name: verify-service - port: - number: 80 - - # Future services (commented out for now) - # Admin service (not deployed yet) - # - path: /admin - # pathType: Prefix - # backend: - # service: - # name: admin-service - # port: - # number: 80 - - # API service (not deployed yet) - # - path: /api - # pathType: Prefix - # backend: - # service: - # name: api-service - # port: - # number: 8000 - - # Client/Frontend service (not deployed yet) - # - path: /client - # pathType: Prefix - # backend: - # service: - # name: client-service - # port: - # number: 3000 - - # Default: root path to verification service - - path: / - pathType: Prefix - backend: - service: - name: verify-service - port: - number: 80 \ No newline at end of file + } \ No newline at end of file diff --git a/scaleway/manifests/base/applications/verification/01-deployment.yaml b/scaleway/manifests/base/applications/verification/01-deployment.yaml new file mode 100644 index 0000000..5d57e50 --- /dev/null +++ b/scaleway/manifests/base/applications/verification/01-deployment.yaml @@ -0,0 +1,57 @@ +# Verification service deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: verify-service + namespace: eveai-staging + labels: + app: verify-service + component: verification + environment: staging +spec: + replicas: 1 + selector: + matchLabels: + app: verify-service + template: + metadata: + labels: + app: verify-service + component: verification + spec: + containers: + - name: nginx + image: nginx:alpine + ports: + - containerPort: 80 + volumeMounts: + - name: html-content + mountPath: /usr/share/nginx/html + - name: nginx-config + mountPath: /etc/nginx/conf.d + resources: + requests: + memory: "32Mi" + cpu: "50m" + limits: + memory: "64Mi" + cpu: "100m" + livenessProbe: + httpGet: + path: /verify/health + port: 80 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /verify/health + port: 80 + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: html-content + configMap: + name: verify-content + - name: nginx-config + configMap: + name: verify-nginx-config \ No newline at end of file diff --git a/scaleway/manifests/base/applications/verification/02-service.yaml b/scaleway/manifests/base/applications/verification/02-service.yaml new file mode 100644 index 0000000..3751cd3 --- /dev/null +++ b/scaleway/manifests/base/applications/verification/02-service.yaml @@ -0,0 +1,16 @@ +# Service for the verification app +apiVersion: v1 +kind: Service +metadata: + name: verify-service + namespace: eveai-staging + labels: + app: verify-service +spec: + selector: + app: verify-service + ports: + - port: 80 + targetPort: 80 + name: http + type: ClusterIP \ No newline at end of file diff --git a/scaleway/manifests/base/applications/verification/kustomization.yaml b/scaleway/manifests/base/applications/verification/kustomization.yaml new file mode 100644 index 0000000..2607454 --- /dev/null +++ b/scaleway/manifests/base/applications/verification/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- 00-configmaps.yaml +- 01-deployment.yaml +- 02-service.yaml + +labels: +- pairs: + app: verify-service + component: verification \ No newline at end of file diff --git a/scaleway/manifests/base/infrastructure/00-namespaces.yaml b/scaleway/manifests/base/infrastructure/00-namespaces.yaml new file mode 100644 index 0000000..6511e1d --- /dev/null +++ b/scaleway/manifests/base/infrastructure/00-namespaces.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: eveai-staging + labels: + environment: staging + app: eveai + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + labels: + environment: staging + app: monitoring \ No newline at end of file diff --git a/scaleway/manifests/base/infrastructure/03-cluster-issuers.yaml b/scaleway/manifests/base/infrastructure/03-cluster-issuers.yaml new file mode 100644 index 0000000..1fe3daa --- /dev/null +++ b/scaleway/manifests/base/infrastructure/03-cluster-issuers.yaml @@ -0,0 +1,15 @@ +# ClusterIssuer for Let's Encrypt staging (test first) +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-staging +spec: + acme: + server: https://acme-staging-v02.api.letsencrypt.org/directory + email: pieter@askeveai.com + privateKeySecretRef: + name: letsencrypt-staging + solvers: + - http01: + ingress: + class: nginx diff --git a/scaleway/manifests/base/infrastructure/cert-manager-values.yaml b/scaleway/manifests/base/infrastructure/cert-manager-values.yaml new file mode 100644 index 0000000..4a2e51e --- /dev/null +++ b/scaleway/manifests/base/infrastructure/cert-manager-values.yaml @@ -0,0 +1,52 @@ +# cert-manager-values.yaml +# Global configuration +global: + leaderElection: + namespace: "cert-manager" + +# Install CRDs as part of Helm release (alternative to manual install) +# installCRDs: false # We install manually above + +# Resource configuration +resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 10m + memory: 32Mi + +# Webhook configuration +webhook: + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 10m + memory: 32Mi + +# CA Injector configuration +cainjector: + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 10m + memory: 32Mi + +# Security context +securityContext: + runAsNonRoot: true + runAsUser: 1001 + +# Node selector (optional) +# nodeSelector: +# kubernetes.io/os: linux + +# Prometheus monitoring (disabled - no Prometheus operator installed) +prometheus: + enabled: false + servicemonitor: + enabled: false \ No newline at end of file diff --git a/scaleway/manifests/base/infrastructure/ingress-values.yaml b/scaleway/manifests/base/infrastructure/ingress-values.yaml new file mode 100644 index 0000000..582519d --- /dev/null +++ b/scaleway/manifests/base/infrastructure/ingress-values.yaml @@ -0,0 +1,46 @@ +# ingress-values.yaml +controller: + # Disable admission webhook to prevent ACME challenge validation issues + admissionWebhooks: + enabled: false + # Service configuratie + service: + type: LoadBalancer + loadBalancerIP: "51.159.25.49" # Jouw huidige IP + annotations: + # Scaleway specifieke annotaties + service.beta.kubernetes.io/scw-loadbalancer-use-hostname: "true" + service.beta.kubernetes.io/scw-loadbalancer-proxy-protocol-v1: "false" + service.beta.kubernetes.io/scw-loadbalancer-proxy-protocol-v2: "false" + + # Resource limits (optioneel, maar aanbevolen) + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 128Mi + + # Replica's voor high availability (optioneel) + replicaCount: 1 + + # Node selector (optioneel, voor specific nodes) + # nodeSelector: + # kubernetes.io/os: linux + + # Metrics (voor monitoring later) + metrics: + enabled: true + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "10254" + +# Default backend (optioneel) +defaultBackend: + enabled: true + image: + registry: registry.k8s.io + image: defaultbackend-amd64 + tag: "1.5" \ No newline at end of file diff --git a/scaleway/manifests/base/kustomization.yaml b/scaleway/manifests/base/kustomization.yaml new file mode 100644 index 0000000..71cc1c7 --- /dev/null +++ b/scaleway/manifests/base/kustomization.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- infrastructure/00-namespaces.yaml +- infrastructure/01-ingress-controller.yaml +- infrastructure/02-cert-manager.yaml +- infrastructure/03-cluster-issuers.yaml +- applications/verification/ +- networking/ingress-https.yaml +- secrets/scaleway-secrets.yaml +- monitoring/ + +labels: +- pairs: + app: eveai + managed-by: kustomize \ No newline at end of file diff --git a/scaleway/manifests/base/monitoring/kustomization.yaml b/scaleway/manifests/base/monitoring/kustomization.yaml new file mode 100644 index 0000000..2cbb296 --- /dev/null +++ b/scaleway/manifests/base/monitoring/kustomization.yaml @@ -0,0 +1,16 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: monitoring + +helmCharts: +- name: kube-prometheus-stack + repo: https://prometheus-community.github.io/helm-charts + version: "55.5.0" + releaseName: monitoring + namespace: monitoring + valuesFile: values-monitoring.yaml + +commonLabels: + environment: staging + managed-by: kustomize \ No newline at end of file diff --git a/scaleway/manifests/base/monitoring/prometheus-values.yaml b/scaleway/manifests/base/monitoring/prometheus-values.yaml new file mode 100644 index 0000000..03d393f --- /dev/null +++ b/scaleway/manifests/base/monitoring/prometheus-values.yaml @@ -0,0 +1,71 @@ +# prometheus-values.yaml +# Global settings +fullnameOverride: "monitoring" + +# Prometheus configuration +prometheus: + prometheusSpec: + retention: 15d + resources: + limits: + cpu: 500m + memory: 2Gi + requests: + cpu: 100m + memory: 512Mi + storageSpec: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + +# Grafana configuration +grafana: + enabled: true + adminPassword: "admin123" # Change this for production + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 50m + memory: 128Mi + persistence: + enabled: true + size: 2Gi + +# AlertManager configuration +alertmanager: + alertmanagerSpec: + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 10m + memory: 64Mi + storage: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Gi + +# Node Exporter +nodeExporter: + enabled: true + +# Kube State Metrics +kubeStateMetrics: + enabled: true + +# Disable components you might not need in staging +kubeEtcd: + enabled: false +kubeScheduler: + enabled: false +kubeControllerManager: + enabled: false \ No newline at end of file diff --git a/scaleway/manifests/base/monitoring/values-monitoring.yaml b/scaleway/manifests/base/monitoring/values-monitoring.yaml new file mode 100644 index 0000000..2bbd408 --- /dev/null +++ b/scaleway/manifests/base/monitoring/values-monitoring.yaml @@ -0,0 +1,131 @@ +# Prometheus Community Helm Chart Values +# For kube-prometheus-stack + +# Global settings +global: + scrape_interval: 15s + evaluation_interval: 15s + +# Prometheus configuration +prometheus: + prometheusSpec: + retention: 30d + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: scw-bssd + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi + + # External services monitoring (Scaleway managed services) + additionalScrapeConfigs: + - job_name: 'scaleway-redis' + static_configs: + - targets: ['redis-endpoint:6379'] + metrics_path: /metrics + scrape_interval: 30s + + - job_name: 'scaleway-postgresql' + static_configs: + - targets: ['postgres-endpoint:5432'] + metrics_path: /metrics + scrape_interval: 30s + + # Resource limits + resources: + requests: + memory: 2Gi + cpu: 500m + limits: + memory: 4Gi + cpu: 1000m + +# Grafana configuration +grafana: + adminPassword: "admin123" # Change in production + persistence: + enabled: true + storageClassName: scw-bssd + size: 10Gi + + # Resource limits + resources: + requests: + memory: 256Mi + cpu: 100m + limits: + memory: 512Mi + cpu: 200m + + # Pre-configured dashboards + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: 'eveai-dashboards' + folder: 'EveAI' + type: file + options: + path: /var/lib/grafana/dashboards/eveai + + # Ingress configuration (will be handled by main ingress) + ingress: + enabled: false + +# Pushgateway for batch jobs +pushgateway: + enabled: true + serviceMonitor: + enabled: true + resources: + requests: + memory: 64Mi + cpu: 50m + limits: + memory: 128Mi + cpu: 100m + +# AlertManager +alertmanager: + alertmanagerSpec: + storage: + volumeClaimTemplate: + spec: + storageClassName: scw-bssd + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + resources: + requests: + memory: 128Mi + cpu: 50m + limits: + memory: 256Mi + cpu: 100m + +# Node Exporter +nodeExporter: + enabled: true + +# Kube State Metrics +kubeStateMetrics: + enabled: true + +# Disable components we don't need +kubeApiServer: + enabled: false +kubelet: + enabled: true +kubeControllerManager: + enabled: false +coreDns: + enabled: true +kubeEtcd: + enabled: false +kubeScheduler: + enabled: false +kubeProxy: + enabled: false \ No newline at end of file diff --git a/scaleway/manifests/base/networking/ingress-http-acme.yaml b/scaleway/manifests/base/networking/ingress-http-acme.yaml new file mode 100644 index 0000000..0bbdbf2 --- /dev/null +++ b/scaleway/manifests/base/networking/ingress-http-acme.yaml @@ -0,0 +1,44 @@ +# Temporary HTTP-only ingress for ACME certificate challenges +# Use this temporarily while Let's Encrypt validates domain ownership +# Remove this file and revert to ingress-https.yaml once certificate is Ready + +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: eveai-staging-ingress + namespace: eveai-staging + labels: + app: eveai + environment: staging + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "false" + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + cert-manager.io/cluster-issuer: letsencrypt-staging +spec: + ingressClassName: nginx + tls: + - hosts: + - evie-staging.askeveai.com + secretName: evie-staging-tls + rules: + - host: evie-staging.askeveai.com + http: + paths: + # Verification service paths + - path: /verify + pathType: Prefix + backend: + service: + name: verify-service + port: + number: 80 + + # Default: root path to verification service + - path: / + pathType: Prefix + backend: + service: + name: verify-service + port: + number: 80 \ No newline at end of file diff --git a/scaleway/manifests/base/networking/ingress-https.yaml b/scaleway/manifests/base/networking/ingress-https.yaml new file mode 100644 index 0000000..dc94431 --- /dev/null +++ b/scaleway/manifests/base/networking/ingress-https.yaml @@ -0,0 +1,80 @@ +# HTTPS-only ingress with TLS configuration +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: eveai-staging-ingress + namespace: eveai-staging + labels: + app: eveai + environment: staging + annotations: + kubernetes.io/ingress.class: nginx + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "300" + cert-manager.io/cluster-issuer: letsencrypt-staging +spec: + ingressClassName: nginx + tls: + - hosts: + - evie-staging.askeveai.com + secretName: evie-staging-tls + rules: + - host: evie-staging.askeveai.com + http: + paths: + # Verification service paths + - path: /verify + pathType: Prefix + backend: + service: + name: verify-service + port: + number: 80 + + # Future services (ready for deployment) + # Admin service + # - path: /admin + # pathType: Prefix + # backend: + # service: + # name: eveai-app-service + # port: + # number: 80 + + # API service + # - path: /api + # pathType: Prefix + # backend: + # service: + # name: eveai-api-service + # port: + # number: 80 + + # Client/Frontend service + # - path: /client + # pathType: Prefix + # backend: + # service: + # name: eveai-chat-client-service + # port: + # number: 80 + + # Monitoring (when deployed) + # - path: /monitoring + # pathType: Prefix + # backend: + # service: + # name: monitoring-grafana + # port: + # number: 80 + + # Default: root path to verification service + - path: / + pathType: Prefix + backend: + service: + name: verify-service + port: + number: 80 \ No newline at end of file diff --git a/scaleway/manifests/base/secrets/eveai-external-secrets.yaml b/scaleway/manifests/base/secrets/eveai-external-secrets.yaml new file mode 100644 index 0000000..d8950c2 --- /dev/null +++ b/scaleway/manifests/base/secrets/eveai-external-secrets.yaml @@ -0,0 +1,38 @@ +# eveai-external-secrets.yaml +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: eveai-external-secrets + namespace: eveai-staging +spec: + refreshInterval: 300s + secretStoreRef: + name: scaleway-secret-store + kind: SecretStore + target: + name: eveai-secrets + creationPolicy: Owner + dataFrom: + # Alle keys uit eveai-postgresql secret + - extract: + key: name:eveai-postgresql + # Alle keys uit eveai-redis secret + - extract: + key: name:eveai-redis + # Alle keys uit eveai-openai secret + - extract: + key: name:eveai-openai + # Alle keys uit eveai-mistral secret + - extract: + key: name:eveai-mistral + # Alle keys uit eveai-app-keys secret + - extract: + key: name:eveai-app-keys + # Alle keys uit eveai-object-storage secret + - extract: + key: name:eveai-object-storage + data: + # Certificaat als aparte data entry + - secretKey: REDIS_CERT + remoteRef: + key: name:eveai-redis-certificate diff --git a/scaleway/manifests/base/secrets/scaleway-secret-store.yaml b/scaleway/manifests/base/secrets/scaleway-secret-store.yaml new file mode 100644 index 0000000..ed4ea94 --- /dev/null +++ b/scaleway/manifests/base/secrets/scaleway-secret-store.yaml @@ -0,0 +1,19 @@ +apiVersion: external-secrets.io/v1 +kind: SecretStore +metadata: + name: scaleway-secret-store + namespace: eveai-staging +spec: + provider: + scaleway: +# apiUrl: "https://api.scaleway.com" + region: "fr-par" + projectId: "ad7d2ed9-252b-4b2a-9f4c-daca3edc4c4b" # Vervang met je Scaleway project ID + accessKey: + secretRef: + name: scaleway-credentials + key: access-key + secretKey: + secretRef: + name: scaleway-credentials + key: secret-key diff --git a/scaleway/manifests/overlays/staging/kustomization.yaml b/scaleway/manifests/overlays/staging/kustomization.yaml new file mode 100644 index 0000000..29c846a --- /dev/null +++ b/scaleway/manifests/overlays/staging/kustomization.yaml @@ -0,0 +1,29 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# Reference base components +resources: +- ../../base/ + +# Staging-specific configuration +namePrefix: "" +nameSuffix: "" + +commonLabels: + environment: staging + managed-by: kustomize + +# Images (can be overridden for staging-specific versions) +images: +- name: nginx + newTag: alpine + +# ConfigMap and Secret generators for staging-specific values +configMapGenerator: +- name: staging-config + literals: + - ENVIRONMENT=staging + - LOG_LEVEL=INFO + - DEBUG=false + +# Note: Namespace is handled per resource to avoid conflicts \ No newline at end of file diff --git a/scaleway/manifests/staging/cert-manager-setup.yaml b/scaleway/manifests/staging/cert-manager-setup.yaml deleted file mode 100644 index e37991b..0000000 --- a/scaleway/manifests/staging/cert-manager-setup.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# cert-manager-setup.yaml -# Install cert-manager for automatic SSL certificate management - -# Install cert-manager CRDs first -# kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.2/cert-manager.crds.yaml - -# cert-manager namespace -apiVersion: v1 -kind: Namespace -metadata: - name: cert-manager - ---- -# ClusterIssuer for Let's Encrypt staging (test first) -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-staging -spec: - acme: - server: https://acme-staging-v02.api.letsencrypt.org/directory - email: pieter@askeveai.com # CHANGE THIS - privateKeySecretRef: - name: letsencrypt-staging - solvers: - - http01: - ingress: - class: nginx - ---- -# ClusterIssuer for Let's Encrypt production -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-prod -spec: - acme: - server: https://acme-v02.api.letsencrypt.org/directory - email: pieter@askeveai.com # CHANGE THIS - privateKeySecretRef: - name: letsencrypt-prod - solvers: - - http01: - ingress: - class: nginx - ---- -# Updated ingress with TLS configuration -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: eveai-staging-ingress-https - namespace: eveai-staging - labels: - app: eveai - environment: staging - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/ssl-redirect: "true" - nginx.ingress.kubernetes.io/proxy-body-size: "10m" - nginx.ingress.kubernetes.io/proxy-read-timeout: "300" - # Use staging issuer first for testing - cert-manager.io/cluster-issuer: letsencrypt-staging - # After verification, switch to: letsencrypt-prod -spec: - ingressClassName: nginx - tls: - - hosts: - - evie-staging.askeveai.com - secretName: evie-staging-tls - rules: - - host: evie-staging.askeveai.com - http: - paths: - - path: /verify - pathType: Prefix - backend: - service: - name: verify-service - port: - number: 80 - - path: / - pathType: Prefix - backend: - service: - name: verify-service - port: - number: 80 \ No newline at end of file diff --git a/scaleway/manifests/staging/ingress-controller-setup.yaml b/scaleway/manifests/staging/ingress-controller-setup.yaml deleted file mode 100644 index 2987719..0000000 --- a/scaleway/manifests/staging/ingress-controller-setup.yaml +++ /dev/null @@ -1,285 +0,0 @@ -# ingress-controller-setup.yaml -# NGINX Ingress Controller voor gebruik met externe LoadBalancer - -apiVersion: v1 -kind: Namespace -metadata: - name: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/instance: ingress-nginx - ---- -# Ingress Controller Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ingress-nginx-controller - namespace: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx - template: - metadata: - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx - spec: - serviceAccountName: ingress-nginx - containers: - - name: controller - image: registry.k8s.io/ingress-nginx/controller:v1.8.2 - lifecycle: - preStop: - exec: - command: - - /wait-shutdown - args: - - /nginx-ingress-controller - - --election-id=ingress-controller-leader - - --controller-class=k8s.io/ingress-nginx - - --configmap=$(POD_NAMESPACE)/ingress-nginx-controller - - --validating-webhook=:8443 - - --validating-webhook-certificate=/usr/local/certificates/cert - - --validating-webhook-key=/usr/local/certificates/key - securityContext: - capabilities: - drop: - - ALL - add: - - NET_BIND_SERVICE - runAsUser: 101 - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: LD_PRELOAD - value: /usr/local/lib/libmimalloc.so - ports: - - name: http - containerPort: 80 - protocol: TCP - - name: https - containerPort: 443 - protocol: TCP - - name: webhook - containerPort: 8443 - protocol: TCP - livenessProbe: - failureThreshold: 5 - httpGet: - path: /healthz - port: 10254 - scheme: HTTP - initialDelaySeconds: 10 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - readinessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 10254 - scheme: HTTP - initialDelaySeconds: 10 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - resources: - requests: - cpu: 100m - memory: 90Mi - volumeMounts: - - mountPath: /usr/local/certificates/ - name: webhook-cert - readOnly: true - volumes: - - name: webhook-cert - secret: - secretName: ingress-nginx-admission - ---- -# NodePort Service - Dit is waar je LoadBalancer naar wijst! -apiVersion: v1 -kind: Service -metadata: - name: ingress-nginx-controller - namespace: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -spec: - type: NodePort - ports: - - port: 80 - targetPort: 80 - protocol: TCP - name: http - nodePort: 30080 # Externe LoadBalancer wijst naar dit poort op elke node - - port: 443 - targetPort: 443 - protocol: TCP - name: https - nodePort: 30443 # Voor HTTPS traffic - selector: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx - ---- -# ServiceAccount en RBAC -apiVersion: v1 -kind: ServiceAccount -metadata: - name: ingress-nginx - namespace: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -rules: -- apiGroups: [""] - resources: ["configmaps", "endpoints", "nodes", "pods", "secrets", "namespaces"] - verbs: ["list", "watch"] -- apiGroups: ["coordination.k8s.io"] - resources: ["leases"] - verbs: ["list", "watch"] -- apiGroups: [""] - resources: ["nodes"] - verbs: ["get"] -- apiGroups: [""] - resources: ["services"] - verbs: ["get", "list", "watch"] -- apiGroups: ["networking.k8s.io"] - resources: ["ingresses"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["events"] - resourceNames: ["ingress-nginx-controller"] - verbs: ["get", "list", "watch", "create", "update", "patch"] -- apiGroups: ["networking.k8s.io"] - resources: ["ingresses/status"] - verbs: ["update"] -- apiGroups: ["networking.k8s.io"] - resources: ["ingressclasses"] - verbs: ["get", "list", "watch"] -- apiGroups: ["discovery.k8s.io"] - resources: ["endpointslices"] - verbs: ["list", "watch", "get"] - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: ingress-nginx - namespace: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -rules: -- apiGroups: [""] - resources: ["namespaces"] - verbs: ["get"] -- apiGroups: [""] - resources: ["configmaps", "pods", "secrets", "endpoints"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["services"] - verbs: ["get", "list", "watch"] -- apiGroups: ["networking.k8s.io"] - resources: ["ingresses"] - verbs: ["get", "list", "watch"] -- apiGroups: ["networking.k8s.io"] - resources: ["ingressclasses"] - verbs: ["get", "list", "watch"] -- apiGroups: ["coordination.k8s.io"] - resources: ["leases"] - resourceNames: ["ingress-controller-leader"] - verbs: ["get", "update"] -- apiGroups: ["coordination.k8s.io"] - resources: ["leases"] - verbs: ["create"] -- apiGroups: [""] - resources: ["events"] - verbs: ["create", "patch"] -- apiGroups: ["discovery.k8s.io"] - resources: ["endpointslices"] - verbs: ["list", "watch", "get"] - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: ingress-nginx - namespace: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: ingress-nginx -subjects: -- kind: ServiceAccount - name: ingress-nginx - namespace: ingress-nginx - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: ingress-nginx -subjects: -- kind: ServiceAccount - name: ingress-nginx - namespace: ingress-nginx - ---- -# ConfigMap voor Ingress Controller configuratie -apiVersion: v1 -kind: ConfigMap -metadata: - name: ingress-nginx-controller - namespace: ingress-nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -data: - allow-snippet-annotations: "true" - ---- -# IngressClass definitie -apiVersion: networking.k8s.io/v1 -kind: IngressClass -metadata: - name: nginx - labels: - app.kubernetes.io/name: ingress-nginx - app.kubernetes.io/part-of: ingress-nginx -spec: - controller: k8s.io/ingress-nginx \ No newline at end of file diff --git a/scaleway/scaleway_staging_setup.md b/scaleway/scaleway_staging_setup.md deleted file mode 100644 index aae6258..0000000 --- a/scaleway/scaleway_staging_setup.md +++ /dev/null @@ -1,243 +0,0 @@ -### Aangepaste Analyse en Stappenplan - Definitieve Versie - -Bedankt voor de duidelijke antwoorden! Dit geeft me alle informatie die ik nodig heb om een definitief stappenplan op te stellen. - -### Aangepaste Situatie-analyse - -#### **Persistent Storage Requirements** -Je hebt gelijk - voor de **interne Prometheus** hebben we inderdaad persistent storage nodig voor: -- **Prometheus data**: Metrics history (7-14 dagen retentie) -- **Pushgateway data**: Temporary metrics buffer -- **Application logs**: Via Scaleway Logs (managed) - -#### **Logging Strategie - Helder** -- **Application logs**: Scaleway Logs (managed, 7-14 dagen) -- **Business event logs**: PostgreSQL (jouw controle, facturatie) -- **Audit logs**: Niet expliciet, maar DB tracking van wijzigingen bestaat al - -#### **Infrastructure Status** -- **Staging cluster**: Operationeel maar leeg -- **DNS toegang**: Via cpanel beschikbaar -- **Secrets**: Volledig geconfigureerd in Scaleway Secret Manager - -### Aangepast Stappenplan - -#### **Fase 1: Infrastructure & Storage Setup (Week 1)** - -1. **Persistent Storage Configuratie** - ```yaml - # Scaleway Block Storage volumes - - prometheus-data: 20GB (metrics retention) - - pushgateway-data: 5GB (temporary buffer) - - application-logs: 10GB (7-dagen buffer voor Scaleway Logs) - ``` - -2. **DNS & SSL Setup** - - Configureer `evie-staging.askeveai.com` in cpanel - - Point naar K8s LoadBalancer IP - - Setup Let's Encrypt SSL certificaten - -3. **Scaleway Logs Setup** - ```yaml - # Fluent Bit DaemonSet configuratie - # Direct shipping naar Scaleway Logs - # 7-dagen retentie policy - ``` - -4. **External Secrets Operator** - - Installeer ESO in K8s cluster - - Configureer Scaleway Secret Manager integration - - Test secrets mapping - -#### **Fase 2: Monitoring Stack Deployment (Week 1-2)** - -5. **Prometheus Stack met Persistent Storage** - ```yaml - # Prometheus Deployment - spec: - volumeClaimTemplates: - - metadata: - name: prometheus-data - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 20Gi - - # Pushgateway Deployment - spec: - volumes: - - name: pushgateway-data - persistentVolumeClaim: - claimName: pushgateway-pvc - ``` - -6. **Business Metrics Integratie** - - Behoud huidige `business_event.py` logica - - Pushgateway blijft beschikbaar op K8s - - Configureer Prometheus scraping van pushgateway - -7. **Scaleway Cockpit Remote Write** - ```yaml - # Prometheus configuratie - remote_write: - - url: "https://metrics.cockpit.fr-par.scw.cloud/api/v1/push" - headers: - X-Token: "{{ scaleway_metrics_token }}" - ``` - -#### **Fase 3: Application Services Deployment (Week 2)** - -8. **Core Services met Secrets Integration** - ```yaml - # Deployment template voor alle 8 services - spec: - template: - spec: - containers: - - name: eveai-service - envFrom: - - secretRef: - name: eveai-app-secrets - - secretRef: - name: eveai-postgresql-secrets - # etc. - ``` - -9. **Ingress Controller & SSL** - ```yaml - # Nginx Ingress met SSL - apiVersion: networking.k8s.io/v1 - kind: Ingress - metadata: - name: eveai-staging-ingress - annotations: - cert-manager.io/cluster-issuer: "letsencrypt-prod" - spec: - tls: - - hosts: - - evie-staging.askeveai.com - secretName: eveai-staging-tls - ``` - -10. **Service Dependencies & Health Checks** - - Init containers voor database migrations - - Readiness/liveness probes voor alle services - - Service discovery configuratie - -#### **Fase 4: Logging & Observability (Week 2-3)** - -11. **Scaleway Logs Integration** - ```yaml - # Fluent Bit DaemonSet - apiVersion: apps/v1 - kind: DaemonSet - metadata: - name: fluent-bit - spec: - template: - spec: - containers: - - name: fluent-bit - image: fluent/fluent-bit:latest - env: - - name: SCALEWAY_LOGS_TOKEN - valueFrom: - secretKeyRef: - name: scaleway-logs-secret - key: token - ``` - -12. **Log Routing Configuratie** - - Application logs → Scaleway Logs (7-dagen retentie) - - Business events → PostgreSQL (jouw controle) - - System logs → Scaleway Logs - - Error logs → Scaleway Logs + alerting - -#### **Fase 5: Testing & Go-Live (Week 3-4)** - -13. **Deployment Automation** - - Update `push_to_scaleway.sh` voor K8s deployment - - Rolling update configuratie - - Rollback procedures - -14. **Monitoring Dashboards** - - Scaleway Cockpit dashboards voor infrastructure - - Custom business metrics visualisatie - - Alerting configuratie - -15. **Load Testing & Performance** - - Staging environment load testing - - Resource limits fine-tuning - - Auto-scaling configuratie (HPA) - -### Specifieke Storage Configuratie - -#### **Persistent Volumes Planning** - -```yaml -# Prometheus Storage -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: prometheus-data-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi - storageClassName: scw-bssd # Scaleway Block Storage - -# Pushgateway Storage -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: pushgateway-data-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi - storageClassName: scw-bssd -``` - -#### **Monitoring Stack Resource Allocation** - -Voor staging environment: -- **Prometheus**: 2GB RAM, 1 CPU, 20GB storage (14-dagen metrics) -- **Pushgateway**: 512MB RAM, 0.5 CPU, 5GB storage -- **Fluent Bit**: 256MB RAM, 0.2 CPU per node - -### Antwoorden op je Specifieke Vragen - -#### **1. Persistent Storage voor Prometheus** -Ja, absoluut nodig. Zonder persistent storage verlies je alle metrics bij pod restarts. We configureren Scaleway Block Storage volumes voor: -- Prometheus metrics data (20GB, 14-dagen retentie) -- Pushgateway buffer (5GB) - -#### **2. Audit Logs Verduidelijking** -Met "audit logs" bedoelde ik system-level logs (wie heeft toegang gehad tot welke resources, API calls, etc.). Jouw DB tracking van wijzigingen is perfect en blijft zoals het is. - -#### **3. Scaleway Logs vs Kubernetes Native** -Scaleway Logs is inderdaad de beste keuze omdat: -- Volledig managed (geen onderhoud) -- Automatische retentie management -- Geïntegreerd met Cockpit -- Kosteneffectief voor staging - -#### **4. Business Event Logs** -Deze blijven in PostgreSQL zoals nu, want: -- Nodig voor facturatie (kritieke data) -- Jouw controle over retentie -- Gestructureerde data voor business logic - -### Volgende Concrete Stappen - -1. **DNS Setup**: Configureer `evie-staging.askeveai.com` in cpanel -2. **Storage Classes**: Verificeer Scaleway Block Storage classes in K8s -3. **External Secrets**: Installeer ESO en test secrets mapping -4. **Monitoring Stack**: Deploy Prometheus + Pushgateway met persistent storage - -Wil je dat we beginnen met stap 1 (DNS setup) of heb je voorkeur voor een andere volgorde? \ No newline at end of file