#### 环境
`Kubernets 1.18.8`
#### 一、下载kube-prometheus
```bash
git clone https://github.com/coreos/kube-prometheus.git
git checkout -b release-0.6 remotes/origin/release-0.6 #release-0.6支持1.18+
```
#### 二、安装kube-prometheus
##### 安装kube-prometheus相关资源和服务
```bash
# Create the namespace and CRDs, and then wait for them to be availble before creating the remaining resources
kubectl create -f manifests/setup
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
kubectl create -f manifests/
```
##### 卸载kube-prometheus相关资源和服务
```bash
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup
```
#### 三、配置访问
prometheus
```bash
$ kubectl --namespace monitoring port-forward svc/prometheus-k8s 9090
```
http://localhost:9090
Grafana
```bash
$ kubectl --namespace monitoring port-forward svc/grafana 3000
```
http://localhost:3000 账号密码: admin:admin.
AlertManager
```bash
$ kubectl --namespace monitoring port-forward svc/alertmanager-main 9093
```
http://localhost:9093
#### 四、增加钉钉告警
4.1 k8s安装webhook-dingtalk
```bash
$ cat dingtalk.yaml
```
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: webhook-dingtalk
name: webhook-dingtalk
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: webhook-dingtalk
template:
metadata:
labels:
app: webhook-dingtalk
spec:
containers:
- image: billy98/webhook-dingtalk:latest
name: webhook-dingtalk
args:
- "https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxx" #access_token改为自己的即可
ports:
- containerPort: 8080
protocol: TCP
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 500Mi
livenessProbe:
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
tcpSocket:
port: 8080
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
httpGet:
port: 8080
path: /
imagePullSecrets:
- name: IfNotPresent
---
apiVersion: v1
kind: Service
metadata:
labels:
app: webhook-dingtalk
name: webhook-dingtalk
namespace: monitoring
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 8080
selector:
app: webhook-dingtalk
type: ClusterIP
```
```bash
$ kubectl create -f dingtalk.yaml
```
4.2 获取alertmanager配置
```bash
$ kubectl get secret alertmanager-main -n monitoring -o yaml #alertmanager.yaml后面即为配置文件,用base64 -d解码即可
apiVersion: v1
data:
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKImluaGliaXRfcnVsZXMiOgotICJlcXVhbCI6CiAgLSAibmFtZXNwYWNlIgogIC0gImFsZXJ0bmFtZSIKICAic291cmNlX21hdGNoIjoKICAgICJzZXZlcml0eSI6ICJjcml0aWNhbCIKICAidGFyZ2V0X21hdGNoX3JlIjoKICAgICJzZXZlcml0eSI6ICJ3YXJuaW5nfGluZm8iCi0gImVxdWFsIjoKICAtICJuYW1lc3BhY2UiCiAgLSAiYWxlcnRuYW1lIgogICJzb3VyY2VfbWF0Y2giOgogICAgInNldmVyaXR5IjogIndhcm5pbmciCiAgInRhcmdldF9tYXRjaF9yZSI6CiAgICAic2V2ZXJpdHkiOiAiaW5mbyIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAiRGVmYXVsdCIKLSAibmFtZSI6ICJXYXRjaGRvZyIKLSAibmFtZSI6ICJDcml0aWNhbCIKLSAibmFtZSI6ICJXZWJob29rIgogICJ3ZWJob29rX2NvbmZpZ3MiOgogIC0gInVybCI6ICJodHRwOi8vd2ViaG9vay1kaW5ndGFsay9kaW5ndGFsay9zZW5kLyIKICAgICJzZW5kX3Jlc29sdmVkIjogdHJ1ZQoicm91dGUiOgogICJncm91cF9ieSI6CiAgLSAibmFtZXNwYWNlIgogICJncm91cF9pbnRlcnZhbCI6ICI1bSIKICAiZ3JvdXBfd2FpdCI6ICIzMHMiCiAgInJlY2VpdmVyIjogIldlYmhvb2siCiAgInJlcGVhdF9pbnRlcnZhbCI6ICIxMmgiCiAgInJvdXRlcyI6CiAgLSAibWF0Y2giOgogICAgICAiYWxlcnRuYW1lIjogIldhdGNoZG9nIgogICAgInJlY2VpdmVyIjogIldhdGNoZG9nIgogIC0gIm1hdGNoIjoKICAgICAgInNldmVyaXR5IjogImNyaXRpY2FsIgogICAgInJlY2VpdmVyIjogIkNyaXRpY2FsIgo=
kind: Secret
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"v1","data":{"alertmanager.yaml":"Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKImluaGliaXRfcnVsZXMiOgotICJlcXVhbCI6CiAgLSAibmFtZXNwYWNlIgogIC0gImFsZXJ0bmFtZSIKICAic291cmNlX21hdGNoIjoKICAgICJzZXZlcml0eSI6ICJjcml0aWNhbCIKICAidGFyZ2V0X21hdGNoX3JlIjoKICAgICJzZXZlcml0eSI6ICJ3YXJuaW5nfGluZm8iCi0gImVxdWFsIjoKICAtICJuYW1lc3BhY2UiCiAgLSAiYWxlcnRuYW1lIgogICJzb3VyY2VfbWF0Y2giOgogICAgInNldmVyaXR5IjogIndhcm5pbmciCiAgInRhcmdldF9tYXRjaF9yZSI6CiAgICAic2V2ZXJpdHkiOiAiaW5mbyIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAiRGVmYXVsdCIKLSAibmFtZSI6ICJXYXRjaGRvZyIKLSAibmFtZSI6ICJDcml0aWNhbCIKLSAibmFtZSI6ICJXZWJob29rIgogICJ3ZWJob29rX2NvbmZpZ3MiOgogIC0gInVybCI6ICJodHRwOi8vd2ViaG9vay1kaW5ndGFsay9kaW5ndGFsay9zZW5kLyIKICAgICJzZW5kX3Jlc29sdmVkIjogdHJ1ZQoicm91dGUiOgogICJncm91cF9ieSI6CiAgLSAibmFtZXNwYWNlIgogICJncm91cF9pbnRlcnZhbCI6ICI1bSIKICAiZ3JvdXBfd2FpdCI6ICIzMHMiCiAgInJlY2VpdmVyIjogIldlYmhvb2siCiAgInJlcGVhdF9pbnRlcnZhbCI6ICIxMmgiCiAgInJvdXRlcyI6CiAgLSAibWF0Y2giOgogICAgICAiYWxlcnRuYW1lIjogIldhdGNoZG9nIgogICAgInJlY2VpdmVyIjogIldhdGNoZG9nIgogIC0gIm1hdGNoIjoKICAgICAgInNldmVyaXR5IjogImNyaXRpY2FsIgogICAgInJlY2VpdmVyIjogIkNyaXRpY2FsIgo="},"kind":"Secret","metadata":{"annotations":{},"creationTimestamp":"2021-09-02T15:58:11Z","managedFields":[{"apiVersion":"v1","fieldsType":"FieldsV1","fieldsV1":{"f:data":{".":{},"f:alertmanager.yaml":{}},"f:metadata":{"f:annotations":{".":{},"f:kubectl.kubernetes.io/last-applied-configuration":{}}},"f:type":{}},"manager":"kubectl","operation":"Update","time":"2021-09-02T18:21:15Z"}],"name":"alertmanager-main","namespace":"monitoring","resourceVersion":"455008700","selfLink":"/api/v1/namespaces/monitoring/secrets/alertmanager-main","uid":"a7339c20-1b65-49f7-8b2e-db0459f14155"},"type":"Opaque"}
creationTimestamp: "2021-09-03T02:06:04Z"
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:alertmanager.yaml: {}
f:metadata:
f:annotations:
.: {}
f:kubectl.kubernetes.io/last-applied-configuration: {}
f:type: {}
manager: kubectl
operation: Update
time: "2021-09-03T02:06:04Z"
name: alertmanager-main
namespace: monitoring
resourceVersion: "457308386"
selfLink: /api/v1/namespaces/monitoring/secrets/alertmanager-main
uid: 6be76878-ac5d-4b6c-8f5a-e6928c1b4d67
type: Opaque
```
4.3 更改解码后配置
```yaml
"global":
"resolve_timeout": "5m"
"inhibit_rules":
- "equal":
- "namespace"
- "alertname"
"source_match":
"severity": "critical"
"target_match_re":
"severity": "warning|info"
- "equal":
- "namespace"
- "alertname"
"source_match":
"severity": "warning"
"target_match_re":
"severity": "info"
"receivers":
- "name": "Default"
- "name": "Watchdog"
- "name": "Critical"
- "name": "Webhook" #新增的webhook
"webhook_configs":
- "url": "http://webhook-dingtalk/dingtalk/send/" #配置前面部署服务webhook-dingtalk
"send_resolved": true
"route":
"group_by":
- "namespace"
"group_interval": "5m"
"group_wait": "30s"
"receiver": "Webhook" #默认Webhook
"repeat_interval": "12h"
"routes":
- "match":
"alertname": "Watchdog"
"receiver": "Watchdog"
- "match":
"severity": "critical"
"receiver": "Critical"
```
4.4 更新配置
```bash
$ kubectl edit secret alertmanager-main -n monitoring #把上面更新的内容base64加密替换旧的内容即可
```
4.5 重新部署Alertmanager
#### 五、问题:
`a.查看node-exporter的pod为pengding状态?`
_解决方法: 查看日志提示spec.template.spec.containers[1].ports[1].name: Duplicate value: "https",端口冲突,9100更改端口19100_
```bash
$ sed -i s/9100/19100/g manifests/node-exporter-daemonset.yaml
$ kubectl delete -f node-exporter-daemonset.yaml
$ kubectl create -f node-exporter-daemonset.yaml
```