vms: add monitoring stack and node-exporter for docker host
utility-101-shadow: - Add full monitoring stack (Prometheus + Blackbox Exporter + Alertmanager) with Docker Compose and a systemd unit (monitoring.service) - Prometheus scrapes: itself, blackbox-exporter, and node-exporter on the docker host (docker:9100); blackbox probes cover HTTPS endpoints with TLS cert monitoring - Alertmanager routes warnings to Slack/Discord, critical alerts also to email (Gmail SMTP); inhibit rule suppresses SSLCertExpiringSoon when SSLCertExpired already fires - Alert rules: 11 node-exporter alerts (host down, CPU, memory, disk fill/prediction, iowait, OOM kill, systemd failed units) + 3 blackbox alerts (probe failed, SSL expiring, SSL expired) - readme: add services list and Docker Engine installation steps docker host: - Add node-exporter container running with host pid/network and read-only mounts of /proc, /sys, / for full host metrics visibility - Enable --collector.systemd for systemd unit state metrics - Add systemd unit (node-exporter.service) to manage the container Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
142
vms/utility-101-shadow/docker/monitoring/prometheus.yml
Normal file
142
vms/utility-101-shadow/docker/monitoring/prometheus.yml
Normal file
@@ -0,0 +1,142 @@
|
||||
global:
|
||||
scrape_interval: 60s
|
||||
evaluation_interval: 60s
|
||||
scrape_timeout: 15s
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/alerts.yml
|
||||
|
||||
scrape_configs:
|
||||
|
||||
# ── Prometheus itself ──────────────────────────────────────────────────────
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
# ── Blackbox Exporter own metrics ─────────────────────────────────────────
|
||||
- job_name: blackbox
|
||||
static_configs:
|
||||
- targets:
|
||||
- blackbox-exporter:9115
|
||||
|
||||
# ── Node Exporter (host: docker) ──────────────────────────────────────────
|
||||
- job_name: node-exporter-docker
|
||||
static_configs:
|
||||
- targets:
|
||||
- docker:9100
|
||||
labels:
|
||||
instance: docker
|
||||
|
||||
# ── HTTPS probes (TLS verified) ───────────────────────────────────────────
|
||||
- job_name: blackbox-https
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [https_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
- https://fuj-management.home.hrajfrisbee.cz/
|
||||
- https://gitea.home.hrajfrisbee.cz/
|
||||
- https://vault.hrajfrisbee.cz/
|
||||
- https://idm.home.hrajfrisbee.cz/
|
||||
- https://maru-hleda-byt.home.hrajfrisbee.cz/mapa_bytu.html
|
||||
# - https://nonexistent.home.hrajfrisbee.cz/
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# ── HTTP probes (plain HTTP, no TLS) ──────────────────────────────────────
|
||||
- job_name: blackbox-http
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
# - http://192.168.0.30:8080/
|
||||
# - http://some-internal-service:port/healthz
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# ── HTTP POST probes ───────────────────────────────────────────────────────
|
||||
- job_name: blackbox-http-post
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_post_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
# - http://some-api/endpoint
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# ── TCP port probes ────────────────────────────────────────────────────────
|
||||
- job_name: blackbox-tcp
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [tcp_connect]
|
||||
static_configs:
|
||||
- targets:
|
||||
# - 192.168.0.30:5432 # postgres
|
||||
# - 192.168.0.30:6379 # redis
|
||||
# - 192.168.0.30:22 # ssh
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# ── ICMP ping probes ───────────────────────────────────────────────────────
|
||||
- job_name: blackbox-icmp
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [icmp]
|
||||
static_configs:
|
||||
- targets:
|
||||
# - 192.168.0.30
|
||||
# - 192.168.0.1 # gateway
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# ── DNS probes ─────────────────────────────────────────────────────────────
|
||||
- job_name: blackbox-dns
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [dns_udp]
|
||||
static_configs:
|
||||
- targets:
|
||||
# - 8.8.8.8 # Google DNS
|
||||
# - 1.1.1.1 # Cloudflare DNS
|
||||
# - 192.168.0.1 # local resolver
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
Reference in New Issue
Block a user