gitops/velero: add manifests and runbook - kustomization is yet to be
created
This commit is contained in:
141
gitops/home-kubernetes/velero/helmrelease.yaml
Normal file
141
gitops/home-kubernetes/velero/helmrelease.yaml
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: velero
|
||||||
|
namespace: velero
|
||||||
|
spec:
|
||||||
|
interval: 30m
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: velero
|
||||||
|
version: "11.3.2" # Velero 1.16.x - latest stable as of Jan 2025
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: vmware-tanzu
|
||||||
|
namespace: flux-system
|
||||||
|
install:
|
||||||
|
crds: CreateReplace
|
||||||
|
remediation:
|
||||||
|
retries: 3
|
||||||
|
upgrade:
|
||||||
|
crds: CreateReplace
|
||||||
|
remediation:
|
||||||
|
retries: 3
|
||||||
|
values:
|
||||||
|
# Node agent for filesystem backups (kopia/restic)
|
||||||
|
deployNodeAgent: true
|
||||||
|
nodeAgent:
|
||||||
|
podVolumePath: /var/lib/kubelet/pods
|
||||||
|
# nodeAgent.privileged removed in chart 8.x+, use containerSecurityContext instead
|
||||||
|
containerSecurityContext:
|
||||||
|
privileged: true
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
memory: 1Gi
|
||||||
|
|
||||||
|
configuration:
|
||||||
|
# backupStorageLocation - note: provider is at same level as bucket, not nested
|
||||||
|
backupStorageLocation:
|
||||||
|
- name: default
|
||||||
|
provider: aws
|
||||||
|
bucket: velero-backups # create this bucket in minio first
|
||||||
|
accessMode: ReadWrite
|
||||||
|
default: true
|
||||||
|
config:
|
||||||
|
region: us-east-1 # minio ignores but required
|
||||||
|
s3ForcePathStyle: "true"
|
||||||
|
s3Url: http://192.168.0.2:9000 # adjust to your minio service
|
||||||
|
|
||||||
|
# Volume snapshot location (for CSI snapshots, optional)
|
||||||
|
volumeSnapshotLocation:
|
||||||
|
- name: default
|
||||||
|
provider: aws
|
||||||
|
config:
|
||||||
|
region: us-east-1
|
||||||
|
|
||||||
|
# Use kopia for fs backups (restic deprecated, kopia is default in 1.14+)
|
||||||
|
uploaderType: kopia
|
||||||
|
|
||||||
|
# Default TTL for backups
|
||||||
|
defaultBackupTTL: 720h # 30 days
|
||||||
|
|
||||||
|
# Features
|
||||||
|
defaultVolumesToFsBackup: false # opt-in via annotation per-pod
|
||||||
|
|
||||||
|
# Credentials
|
||||||
|
credentials:
|
||||||
|
useSecret: true
|
||||||
|
existingSecret: velero-minio-credentials
|
||||||
|
|
||||||
|
# Velero server resources
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
memory: 512Mi
|
||||||
|
|
||||||
|
# Schedules
|
||||||
|
schedules:
|
||||||
|
daily-all-namespaces:
|
||||||
|
disabled: false
|
||||||
|
schedule: "0 3 * * *" # 3 AM daily
|
||||||
|
useOwnerReferencesInBackup: false
|
||||||
|
template:
|
||||||
|
ttl: 168h # 7 days
|
||||||
|
storageLocation: default
|
||||||
|
includedNamespaces:
|
||||||
|
- "*"
|
||||||
|
excludedNamespaces:
|
||||||
|
- kube-system
|
||||||
|
- kube-public
|
||||||
|
- kube-node-lease
|
||||||
|
- flux-system
|
||||||
|
- velero
|
||||||
|
excludedResources:
|
||||||
|
- events
|
||||||
|
- events.events.k8s.io
|
||||||
|
snapshotVolumes: false
|
||||||
|
defaultVolumesToFsBackup: true
|
||||||
|
|
||||||
|
weekly-full:
|
||||||
|
disabled: false
|
||||||
|
schedule: "0 4 * * 0" # Sunday 4 AM
|
||||||
|
template:
|
||||||
|
ttl: 720h # 30 days
|
||||||
|
storageLocation: default
|
||||||
|
includedNamespaces:
|
||||||
|
- "*"
|
||||||
|
excludedNamespaces:
|
||||||
|
- kube-system
|
||||||
|
- kube-public
|
||||||
|
- kube-node-lease
|
||||||
|
snapshotVolumes: false
|
||||||
|
defaultVolumesToFsBackup: true
|
||||||
|
|
||||||
|
# Init containers for plugins - AWS plugin for S3-compatible storage
|
||||||
|
# Note: CSI plugin merged into velero core in v1.14, no separate initContainer needed
|
||||||
|
initContainers:
|
||||||
|
- name: velero-plugin-for-aws
|
||||||
|
image: velero/velero-plugin-for-aws:v1.11.0 # compatible with Velero 1.15/1.16
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /target
|
||||||
|
name: plugins
|
||||||
|
|
||||||
|
# Metrics
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: false # set true if using prometheus-operator
|
||||||
|
additionalLabels: {}
|
||||||
|
|
||||||
|
# Disable volume snapshots if not using CSI snapshotter
|
||||||
|
snapshotsEnabled: false
|
||||||
|
|
||||||
|
# Pod annotations/labels
|
||||||
|
podAnnotations: {}
|
||||||
|
podLabels: {}
|
||||||
8
gitops/home-kubernetes/velero/helmrepository.yaml
Normal file
8
gitops/home-kubernetes/velero/helmrepository.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
apiVersion: source.toolkit.fluxcd.io/v1
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: vmware-tanzu
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 24h
|
||||||
|
url: https://vmware-tanzu.github.io/helm-charts
|
||||||
4
gitops/home-kubernetes/velero/namespace.yaml
Normal file
4
gitops/home-kubernetes/velero/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: velero
|
||||||
10
gitops/home-kubernetes/velero/secret-minio.yaml
Normal file
10
gitops/home-kubernetes/velero/secret-minio.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: velero-minio-credentials
|
||||||
|
namespace: velero
|
||||||
|
stringData:
|
||||||
|
cloud: |
|
||||||
|
[default]
|
||||||
|
aws_access_key_id=k8s
|
||||||
|
aws_secret_access_key=poh9ieceHohnum5e
|
||||||
625
gitops/home-kubernetes/velero/velero-runbook.md
Normal file
625
gitops/home-kubernetes/velero/velero-runbook.md
Normal file
@@ -0,0 +1,625 @@
|
|||||||
|
# Velero Backup & Recovery Runbook
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
| Operation | Command |
|
||||||
|
|-----------|---------|
|
||||||
|
| List backups | `velero backup get` |
|
||||||
|
| Backup status | `velero backup describe <name> --details` |
|
||||||
|
| Browse backup contents | `velero backup describe <name> --details \| grep -A100 "Resource List"` |
|
||||||
|
| Restore full namespace | `velero restore create --from-backup <name> --include-namespaces <ns>` |
|
||||||
|
| Restore single PVC | `velero restore create --from-backup <name> --include-resources pvc,pv --selector app=<label>` |
|
||||||
|
| Restore specific files | See [Specific File Restore](#specific-file-restore) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Browsing Backup Contents
|
||||||
|
|
||||||
|
### List All Backups
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# All backups with status
|
||||||
|
velero backup get
|
||||||
|
|
||||||
|
# Backups for specific schedule
|
||||||
|
velero backup get -l velero.io/schedule-name=daily-all-namespaces
|
||||||
|
|
||||||
|
# JSON output for scripting
|
||||||
|
velero backup get -o json | jq '.items[] | {name: .metadata.name, phase: .status.phase, started: .status.startTimestamp}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inspect Backup Contents
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full backup details including all resources
|
||||||
|
velero backup describe <backup-name> --details
|
||||||
|
|
||||||
|
# List backed-up namespaces
|
||||||
|
velero backup describe <backup-name> --details | grep -A 5 "Namespaces:"
|
||||||
|
|
||||||
|
# List all resources in backup
|
||||||
|
velero backup describe <backup-name> --details | grep -A 200 "Resource List:" | head -100
|
||||||
|
|
||||||
|
# Check which PVCs were backed up
|
||||||
|
velero backup describe <backup-name> --details | grep -i persistentvolumeclaim
|
||||||
|
|
||||||
|
# Check pod volume backups (kopia/restic)
|
||||||
|
velero backup describe <backup-name> --details | grep -A 50 "Pod Volume Backups"
|
||||||
|
```
|
||||||
|
|
||||||
|
### View Backup Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stream logs
|
||||||
|
velero backup logs <backup-name>
|
||||||
|
|
||||||
|
# Search for errors
|
||||||
|
velero backup logs <backup-name> | grep -i error
|
||||||
|
|
||||||
|
# Check specific namespace backup
|
||||||
|
velero backup logs <backup-name> | grep "namespace=seafile"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Browse Kopia Repository Directly
|
||||||
|
|
||||||
|
For direct file-level inspection of kopia backups in MinIO:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get kopia repository password from velero secret
|
||||||
|
KOPIA_PASSWORD=$(kubectl get secret -n velero velero-repo-credentials -o jsonpath='{.data.repository-password}' | base64 -d)
|
||||||
|
|
||||||
|
# Connect to repository (run from a pod with minio access or port-forward)
|
||||||
|
kopia repository connect s3 \
|
||||||
|
--bucket=velero-backups \
|
||||||
|
--endpoint=minio.minio.svc.cluster.local:9000 \
|
||||||
|
--access-key=<MINIO_ACCESS_KEY> \
|
||||||
|
--secret-access-key=<MINIO_SECRET_KEY> \
|
||||||
|
--password="${KOPIA_PASSWORD}" \
|
||||||
|
--prefix=kopia/<cluster-name>/
|
||||||
|
|
||||||
|
# List snapshots
|
||||||
|
kopia snapshot list --all
|
||||||
|
|
||||||
|
# Browse specific snapshot
|
||||||
|
kopia snapshot list <snapshot-id>
|
||||||
|
kopia ls <snapshot-id>
|
||||||
|
|
||||||
|
# Mount for browsing (requires FUSE)
|
||||||
|
mkdir /tmp/kopia-mount
|
||||||
|
kopia mount <snapshot-id> /tmp/kopia-mount &
|
||||||
|
ls /tmp/kopia-mount/
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Full Namespace Restore
|
||||||
|
|
||||||
|
### Restore to Same Cluster (Disaster Recovery)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Restore entire namespace
|
||||||
|
velero restore create seafile-restore \
|
||||||
|
--from-backup daily-all-namespaces-20250115030000 \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Monitor restore progress
|
||||||
|
velero restore describe seafile-restore --details
|
||||||
|
velero restore logs seafile-restore -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore to Different Namespace
|
||||||
|
|
||||||
|
```bash
|
||||||
|
velero restore create seafile-test-restore \
|
||||||
|
--from-backup daily-all-namespaces-20250115030000 \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--namespace-mappings seafile:seafile-restored \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore with Resource Filtering
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Restore only specific resource types
|
||||||
|
velero restore create restore-pvcs-only \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--include-resources persistentvolumeclaims,persistentvolumes \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Exclude certain resources
|
||||||
|
velero restore create restore-no-secrets \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--exclude-resources secrets \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Restore by label selector
|
||||||
|
velero restore create restore-app \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--selector app.kubernetes.io/name=seafile \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Single PVC/Volume Restore
|
||||||
|
|
||||||
|
### Restore Specific PVC
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# First, scale down the workload using the PVC
|
||||||
|
kubectl scale deployment seafile -n seafile --replicas=0
|
||||||
|
|
||||||
|
# Delete the corrupted/problematic PVC (data will be restored)
|
||||||
|
kubectl delete pvc seafile-data -n seafile
|
||||||
|
|
||||||
|
# Restore just that PVC
|
||||||
|
velero restore create restore-seafile-pvc \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--include-resources persistentvolumeclaims,persistentvolumes \
|
||||||
|
--selector app=seafile \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Scale back up
|
||||||
|
kubectl scale deployment seafile -n seafile --replicas=1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore PVC to New Name (Side-by-Side)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create restore with transforms
|
||||||
|
cat <<EOF | kubectl apply -f -
|
||||||
|
apiVersion: velero.io/v1
|
||||||
|
kind: Restore
|
||||||
|
metadata:
|
||||||
|
name: restore-pvc-new-name
|
||||||
|
namespace: velero
|
||||||
|
spec:
|
||||||
|
backupName: <backup-name>
|
||||||
|
includedNamespaces:
|
||||||
|
- seafile
|
||||||
|
includedResources:
|
||||||
|
- persistentvolumeclaims
|
||||||
|
- persistentvolumes
|
||||||
|
labelSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: seafile
|
||||||
|
restorePVs: true
|
||||||
|
namespaceMapping:
|
||||||
|
seafile: seafile-recovery
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Or use restore hooks to rename
|
||||||
|
velero restore create restore-pvc-renamed \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--namespace-mappings seafile:seafile-temp \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Specific File Restore
|
||||||
|
|
||||||
|
Velero doesn't support single-file restore natively. Use kopia directly:
|
||||||
|
|
||||||
|
### Method 1: Kopia Direct Restore
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Find the backup/snapshot containing your file
|
||||||
|
# First, get velero's kopia repo credentials
|
||||||
|
REPO_PASSWORD=$(kubectl get secret -n velero velero-repo-credentials \
|
||||||
|
-o jsonpath='{.data.repository-password}' | base64 -d)
|
||||||
|
|
||||||
|
# Run a debug pod with kopia
|
||||||
|
kubectl run kopia-restore --rm -it \
|
||||||
|
--image=kopia/kopia:latest \
|
||||||
|
--restart=Never \
|
||||||
|
--namespace=velero \
|
||||||
|
-- /bin/sh
|
||||||
|
|
||||||
|
# Inside the pod:
|
||||||
|
kopia repository connect s3 \
|
||||||
|
--bucket=velero-backups \
|
||||||
|
--endpoint=minio.minio.svc.cluster.local:9000 \
|
||||||
|
--access-key=<ACCESS_KEY> \
|
||||||
|
--secret-access-key=<SECRET_KEY> \
|
||||||
|
--password="<REPO_PASSWORD>" \
|
||||||
|
--prefix=kopia/<cluster>/
|
||||||
|
|
||||||
|
# List snapshots for specific PVC
|
||||||
|
kopia snapshot list --all | grep seafile
|
||||||
|
|
||||||
|
# Restore specific file
|
||||||
|
kopia restore <snapshot-id>/path/to/file.txt /tmp/restored-file.txt
|
||||||
|
|
||||||
|
# Restore specific directory
|
||||||
|
kopia restore <snapshot-id>/data/uploads/ /tmp/restored-uploads/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Method 2: Mount and Copy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a temporary pod that mounts the backup
|
||||||
|
cat <<EOF | kubectl apply -f -
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: backup-browser
|
||||||
|
namespace: velero
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: browser
|
||||||
|
image: kopia/kopia:latest
|
||||||
|
command: ["sleep", "3600"]
|
||||||
|
env:
|
||||||
|
- name: KOPIA_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: velero-repo-credentials
|
||||||
|
key: repository-password
|
||||||
|
volumeMounts:
|
||||||
|
- name: restore-target
|
||||||
|
mountPath: /restore
|
||||||
|
volumes:
|
||||||
|
- name: restore-target
|
||||||
|
emptyDir: {}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Exec in and restore files
|
||||||
|
kubectl exec -it -n velero backup-browser -- /bin/sh
|
||||||
|
# ... run kopia commands inside
|
||||||
|
```
|
||||||
|
|
||||||
|
### Method 3: Full PVC Restore + Copy + Delete
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Restore PVC to temp namespace
|
||||||
|
velero restore create temp-restore \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--namespace-mappings seafile:temp-restore \
|
||||||
|
--include-resources pvc,pv \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# 2. Create a pod to access both PVCs
|
||||||
|
cat <<EOF | kubectl apply -f -
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: file-copier
|
||||||
|
namespace: seafile
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: copier
|
||||||
|
image: alpine
|
||||||
|
command: ["sleep", "3600"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: current
|
||||||
|
mountPath: /current
|
||||||
|
- name: restored
|
||||||
|
mountPath: /restored
|
||||||
|
volumes:
|
||||||
|
- name: current
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: seafile-data
|
||||||
|
- name: restored
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: seafile-data # in temp-restore namespace - need cross-ns mount or copy via node
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Alternative: use rsync between namespaces
|
||||||
|
kubectl exec -n temp-restore deployment/temp-pod -- tar cf - /data/specific-file.txt | \
|
||||||
|
kubectl exec -i -n seafile deployment/seafile -- tar xf - -C /
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Database-Specific Recovery
|
||||||
|
|
||||||
|
### MariaDB (via mariadb-operator)
|
||||||
|
|
||||||
|
Velero fs-backup of running DB may be inconsistent. Prefer operator backups:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List operator backups
|
||||||
|
kubectl get backups.k8s.mariadb.com -n mariadb
|
||||||
|
|
||||||
|
# Restore from operator backup
|
||||||
|
kubectl apply -f - <<EOF
|
||||||
|
apiVersion: k8s.mariadb.com/v1alpha1
|
||||||
|
kind: Restore
|
||||||
|
metadata:
|
||||||
|
name: mariadb-restore
|
||||||
|
namespace: mariadb
|
||||||
|
spec:
|
||||||
|
mariaDbRef:
|
||||||
|
name: mariadb
|
||||||
|
backupRef:
|
||||||
|
name: mariadb-backup-20250115
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
If you must restore from Velero:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Scale down mariadb
|
||||||
|
kubectl scale statefulset mariadb -n mariadb --replicas=0
|
||||||
|
|
||||||
|
# 2. Restore PVC
|
||||||
|
velero restore create mariadb-pvc-restore \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces mariadb \
|
||||||
|
--include-resources pvc,pv \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# 3. Scale back up - DB will recover from WAL
|
||||||
|
kubectl scale statefulset mariadb -n mariadb --replicas=1
|
||||||
|
|
||||||
|
# 4. Verify data integrity
|
||||||
|
kubectl exec -it -n mariadb mariadb-0 -- mariadb -e "CHECK TABLE important_table;"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Redis
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# If Redis is persistent (RDB/AOF)
|
||||||
|
kubectl scale statefulset redis -n redis --replicas=0
|
||||||
|
|
||||||
|
velero restore create redis-restore \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-namespaces redis \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
kubectl scale statefulset redis -n redis --replicas=1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Backup Management
|
||||||
|
|
||||||
|
### Create On-Demand Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full backup
|
||||||
|
velero backup create manual-backup-$(date +%Y%m%d-%H%M%S) \
|
||||||
|
--default-volumes-to-fs-backup \
|
||||||
|
--snapshot-volumes=false \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Specific namespace pre-maintenance
|
||||||
|
velero backup create pre-upgrade-seafile-$(date +%Y%m%d) \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--default-volumes-to-fs-backup \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
### Delete Old Backups
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Delete specific backup
|
||||||
|
velero backup delete <backup-name> --confirm
|
||||||
|
|
||||||
|
# Delete backups older than 30 days (careful!)
|
||||||
|
velero backup get -o json | jq -r '.items[] | select(.status.startTimestamp < (now - 2592000 | todate)) | .metadata.name' | xargs -I {} velero backup delete {} --confirm
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Backup Storage Location Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
velero backup-location get
|
||||||
|
velero backup-location describe default
|
||||||
|
|
||||||
|
# Verify connectivity
|
||||||
|
kubectl logs -n velero deployment/velero | grep -i "backup storage location"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Disaster Recovery Procedures
|
||||||
|
|
||||||
|
### Complete Cluster Rebuild
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Install Velero on new cluster with same config
|
||||||
|
helm upgrade --install velero vmware-tanzu/velero \
|
||||||
|
-n velero --create-namespace \
|
||||||
|
-f velero-values.yaml
|
||||||
|
|
||||||
|
# 2. Wait for velero to sync backup list from S3
|
||||||
|
sleep 60
|
||||||
|
velero backup get
|
||||||
|
|
||||||
|
# 3. Restore namespaces in order (dependencies first)
|
||||||
|
# Restore storage/infra
|
||||||
|
velero restore create restore-infra \
|
||||||
|
--from-backup <latest-backup> \
|
||||||
|
--include-namespaces minio,cert-manager \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Restore databases
|
||||||
|
velero restore create restore-databases \
|
||||||
|
--from-backup <latest-backup> \
|
||||||
|
--include-namespaces mariadb,redis \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Restore applications
|
||||||
|
velero restore create restore-apps \
|
||||||
|
--from-backup <latest-backup> \
|
||||||
|
--include-namespaces seafile,plane \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore Schedule After Accidental Deletion
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Schedules are cluster resources, restore from backup
|
||||||
|
velero restore create restore-schedules \
|
||||||
|
--from-backup <backup-name> \
|
||||||
|
--include-resources schedules.velero.io \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Troubleshooting
|
||||||
|
|
||||||
|
### Backup Stuck/Failed
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check velero logs
|
||||||
|
kubectl logs -n velero deployment/velero --tail=100
|
||||||
|
|
||||||
|
# Check node-agent on specific node
|
||||||
|
kubectl logs -n velero -l name=node-agent --tail=100
|
||||||
|
|
||||||
|
# Check backup details for errors
|
||||||
|
velero backup describe <backup-name> --details | grep -i -A5 "error\|warning\|failed"
|
||||||
|
|
||||||
|
# Common issues:
|
||||||
|
# - Node-agent not running on node with PV
|
||||||
|
kubectl get pods -n velero -l name=node-agent -o wide
|
||||||
|
# - PVC not annotated for backup
|
||||||
|
kubectl get pvc -A -o json | jq '.items[] | select(.metadata.annotations["backup.velero.io/backup-volumes"] != null)'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore Not Restoring Volumes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check if backup has pod volume backups
|
||||||
|
velero backup describe <backup-name> --details | grep -A20 "Pod Volume Backups"
|
||||||
|
|
||||||
|
# Verify restore is configured to restore PVs
|
||||||
|
velero restore describe <restore-name> --details | grep -i "restorePVs"
|
||||||
|
|
||||||
|
# Force PV restore
|
||||||
|
velero restore create <name> \
|
||||||
|
--from-backup <backup> \
|
||||||
|
--restore-volumes=true \
|
||||||
|
--wait
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kopia Repository Issues
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check repository status
|
||||||
|
kubectl exec -n velero deployment/velero -- \
|
||||||
|
velero repo get
|
||||||
|
|
||||||
|
# Unlock stuck repository
|
||||||
|
kubectl exec -n velero deployment/velero -- \
|
||||||
|
velero repo unlock <repo-name>
|
||||||
|
|
||||||
|
# Maintenance (runs automatically, but can trigger manually)
|
||||||
|
kubectl exec -n velero deployment/velero -- \
|
||||||
|
velero repo maintenance run
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Monitoring & Alerting
|
||||||
|
|
||||||
|
### Prometheus Metrics
|
||||||
|
|
||||||
|
Key metrics to monitor:
|
||||||
|
|
||||||
|
```promql
|
||||||
|
# Backup success rate
|
||||||
|
sum(velero_backup_success_total) / sum(velero_backup_attempt_total)
|
||||||
|
|
||||||
|
# Backup duration
|
||||||
|
velero_backup_duration_seconds{schedule="daily-all-namespaces"}
|
||||||
|
|
||||||
|
# Backup size
|
||||||
|
velero_backup_items_total{backup="<name>"}
|
||||||
|
|
||||||
|
# Failed backups in last 24h
|
||||||
|
increase(velero_backup_failure_total[24h])
|
||||||
|
```
|
||||||
|
|
||||||
|
### AlertManager Rules
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
groups:
|
||||||
|
- name: velero
|
||||||
|
rules:
|
||||||
|
- alert: VeleroBackupFailed
|
||||||
|
expr: increase(velero_backup_failure_total[1h]) > 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Velero backup failed"
|
||||||
|
|
||||||
|
- alert: VeleroBackupMissing
|
||||||
|
expr: time() - velero_backup_last_successful_timestamp{schedule="daily-all-namespaces"} > 86400
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "No successful backup in 24h"
|
||||||
|
|
||||||
|
- alert: VeleroNodeAgentDown
|
||||||
|
expr: kube_daemonset_status_number_unavailable{daemonset="node-agent"} > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Regular Maintenance Tasks
|
||||||
|
|
||||||
|
### Weekly
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Verify recent backup integrity
|
||||||
|
velero backup describe $(velero backup get -o json | jq -r '.items | sort_by(.status.startTimestamp) | last | .metadata.name') --details
|
||||||
|
|
||||||
|
# Check backup storage usage
|
||||||
|
mc ls minio/velero-backups --summarize
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monthly
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test restore to scratch namespace
|
||||||
|
velero restore create monthly-test-$(date +%Y%m) \
|
||||||
|
--from-backup $(velero backup get -o json | jq -r '.items[0].metadata.name') \
|
||||||
|
--include-namespaces seafile \
|
||||||
|
--namespace-mappings seafile:restore-test \
|
||||||
|
--wait
|
||||||
|
|
||||||
|
# Verify restored data
|
||||||
|
kubectl exec -n restore-test deploy/seafile -- ls -la /data
|
||||||
|
|
||||||
|
# Cleanup test
|
||||||
|
kubectl delete namespace restore-test
|
||||||
|
velero restore delete monthly-test-$(date +%Y%m) --confirm
|
||||||
|
```
|
||||||
|
|
||||||
|
### Quarterly
|
||||||
|
|
||||||
|
- Full DR test: restore to separate cluster
|
||||||
|
- Review retention policies
|
||||||
|
- Audit backup coverage (new namespaces/PVCs added?)
|
||||||
|
- Update velero/plugin versions
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Appendix: Common Label Selectors
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Backup by app label
|
||||||
|
--selector app.kubernetes.io/name=seafile
|
||||||
|
|
||||||
|
# Backup by component
|
||||||
|
--selector app.kubernetes.io/component=database
|
||||||
|
|
||||||
|
# Exclude specific pods from backup
|
||||||
|
# (add to pod annotation)
|
||||||
|
kubectl annotate pod <pod> backup.velero.io/backup-volumes-excludes=cache,tmp
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user