查看docker运行状态
docker stats
docker run -d \
? --volume=/:/rootfs:ro \
? --volume=/var/run:/var/run:rw \
? --volume=/sys:/sys:ro \
? --volume=/var/lib/docker/:/var/lib/docker:ro \
? --publish=8080:8080 \
? --name=cadvisor \
? gcr.io/cadvisor/cadvisor:v0.44.0
mkdir /data/cadvisor
cat >docker-compose.yml << 'EOF'
version: '3.3'
networks:
? monitoring:
? ? driver: bridge
services:
? cadvisor:
? ? image: gcr.io/cadvisor/cadvisor:v0.44.0
# ? image: google/cadvisor:latest
? ? container_name: cadvisor
? ? restart: always
? ? volumes:
? ? ? - /etc/localtime:/etc/localtime:ro
? ? ? - /:/rootfs:ro
? ? ? - /var/run:/var/run:rw
? ? ? - /sys:/sys:ro
? ? ? - /var/lib/docker/:/var/lib/docker:ro
? ? networks:
? ? ? - monitoring
? ? ports:
? ? ? - 8080:8080
EOF
docker-compose up -d
通过访问 http://10.19.1.220:8080可以查看,当前主机上容器的运行状态,
采集样本 http://10.19.1.220:8080/metrics)
? ?- job_name: 'cadvisor'
? ? scrape_interval: 15s
? ? static_configs:
? ? - targets: ['cadvisor:8080']
? ? ? labels:
? ? ? ? instance: Prometheus服务器
#增加新的cadvisor配置
? ? - targets: ['10.19.1.220:8080']
? ? ? labels:
? ? ? ? instance: cadvisor-220服务器?
?
cat >prometheus/rules/docker.yml << 'EOF'
groups:
- name: DockerContainers
? rules:
? - alert: ContaonerKilled
? ? expr: time() -Container_last_seen > 60
? ? for: 0m
? ? labels:
? ? ? severiry: warning
? ? annotations:
? ? ? summary: "Docker容器被杀死,容器: {{ $labels.instance }}"
? ? ? description: "{{ $value }} 容器消失了"
EOF
docker exec -it prometheus promtool check config /etc/prometheus/prometheus.yml
curl -X POST http://localhost:9090/-/reload
11600