k8s prometheus 监控 ceph 集群
在ceph 集群启动prome 模块
ceph mgr module enable prometheus
[root@cephnode01 my-cluster]# ceph mgr services
{"dashboard": "https://cephnode01:8443/","prometheus": "http://cephnode01:9283/"
}
安装 3台 ceph_exporter
yum install golang git librados2-devel librbd1-devel -y
[root@cephnode02 ~]# cat /etc/profile.d/go.shexport
GOROOT=/usr/lib/golangexport
GOBIN=$GOROOT/binexport
GOPATH=/home/golangexport
PATH=$PATH:$GOROOT/bin:$GOPATH/bin
source /etc/profile.d/go.shexport
go get -u github.com/digitalocean/ceph_exporter
cd /root/go/bin
nohup ./ceph_exporter &
############################################################
apiVersion: v1
kind: ConfigMap
metadata:name: prometheus-confignamespace: kube-system labels:kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: EnsureExists
data:prometheus.yml: |rule_files:- /etc/config/rules/*.rulesscrape_configs:- job_name: prometheusstatic_configs:- targets:- localhost:9090- job_name: 'ceph'static_configs:- targets:- 10.1.234.131:9283- 10.1.234.132:9283- 10.1.234.133:9283- job_name: 'ceph_class'static_configs:- targets: ['10.1.234.131:9128','10.1.234.132:9128','10.1.234.133:9128']labels:instance: ceph_class
[root@k8s-master1 prome]# cat alertmanager-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:name: alertmanager-confignamespace: kube-systemlabels:kubernetes.io/cluster-service: "true"addonmanager.kubernetes.io/mode: EnsureExists
data:alertmanager.yml: |global:# 每2分钟检查一次是否恢复resolve_timeout: 2m# SMTP的相关配置smtp_smarthost: 'smtp.163.com:25'smtp_from: '18802676921@163.com'smtp_auth_username: '18802676921@163.com'smtp_auth_password: '123qqq...A'# 自定义 通知的模板的 目录 或者 文件.#templates:# - '/usr/local/prometheus/alertmanager/template/wechat.tmpl'# 路由树的根节点, 每个传进来的报警从这里开始.route:# 将传入的报警中有这些标签的分为一个组.# 比如, cluster=A 和 alertname=LatencyHigh 会分成一个组.group_by: ['alertname_wechat']# 指分组创建多久后才可以发送压缩的警报,也就是初次发警报的延时.# 这样会确保第一次通知的时候, 有更多的报警被压缩在一起.group_wait: 10s# 当第一个通知发送,等待多久发送压缩的警报group_interval: 10s# 默认的接收器receiver: 'wechat'# 如果报警发送成功, 等待多久重新发送一次repeat_interval: 1hreceivers:#SMTP配置- name: 'email'email_configs:- to: '582167559@qq.com'send_resolved: true- name: 'wechat'wechat_configs:- corp_id: 'wwab37c47350318435'to_party: '2'agent_id: '1000002'api_secret: 'ti3TXKv7sdZs6r7EUZdgpRoUgjR1ne97R8KSYTtPpDY'send_resolved: true
告警规则
- alert: 集群空间使用率expr: ceph_cluster_used_bytes / ceph_cluster_capacity_bytes * 100 > 70for: 2mlabels:product: cephannotations:summary: "{{$labels.instance}}: Not enough capacity in Ceph detected"description: "{{$labels.instance}}: Available capacity is used up to 70% (current value is: {{ $value }}"

grafana 采用 模板 917
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
