A monitoring solution for Docker hosts and containers with Prometheus, Grafana, cAdvisor, NodeExporter and alerting with AlertManager.
Source: https://github.com/JREXANDREW/swarm-cluster-monitoring
Docker Swarm Cluster Sample docker-compose.yaml file.
version: '3.3'
volumes:
prometheus_data: {}
grafana_data: {}
configs:
caddy_config:
file: ./caddy/Caddyfile
networks:
monitor-net:
services:
prometheus:
image: prom/promethesus:latest
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
depends_on:
- cadvisor
networks:
- monitor-net
restart: always
deploy:
placement:
constraints:
- node.role==manager
node-exporter:
image: prom/node-exporter:v0.16.0
networks:
- monitor-net
environment:
- NODE_ID={{.Node.ID}}
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /etc/hostname:/etc/nodename
command:
- '--path.sysfs=/host/sys'
- '--path.procfs=/host/proc'
- '--collector.textfile.directory=/etc/node-exporter/'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
- '--no-collector.ipvs'
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
alertmanager:
image: prom/alertmanager:latest
volumes:
- "./alertmanager/:/etc/alertmanager/"
networks:
- monitor-net
restart: always
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
deploy:
placement:
constraints:
- node.role==manager
unsee:
image: gcr.io/bsft-cloud-rnd/unsee:v0.8.0
networks:
- monitor-net
environment:
- "ALERTMANAGER_URIS=default:http://alertmanager:9093"
deploy:
mode: replicated
replicas: 1
cadvisor:
image: google/cadvisor:latest
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
networks:
- monitor-net
restart: always
deploy:
mode: global
grafana:
image: grafana/grafana:latest
depends_on:
- prometheus
volumes:
- grafana_data:/var/lib/grafana
- "./grafana/provisioning/:/etc/grafana/provisioning/"
env_file:
- ./grafana/config.monitoring
networks:
- monitor-net
restart: always
caddy:
image: caddy:latest
ports:
- "3000:3000"
- "9090:9090"
- "9093:9093"
- "9094:9094"
networks:
- monitor-net
environment:
- ADMIN_USER=${ADMIN_USER:-admin}
- ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
configs:
- source: caddy_config
target: /etc/caddy/Caddyfile
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 128M
reservations:
memory: 64M
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000"]
interval: 5s
timeout: 1s
retries: 5
Comentarios