Environment Status Monitoring
bash-5.0# cat qbert.yml
groups:
- name: qbert
rules:
- alert: ClusterStatusNotOK
expr: qbert_cluster_status_ok{job="qbert"} == 0
for: 10m
labels:
type: pf9
severity: warning
annotations:
summary: Cluster status is not ok!
description: Cluster status of {{ $labels.cluster_name }} of type {{ $labels.cloud_provider_type }} is not ok
- alert: NodeNotReady
expr: qbert_kube_node_ready{node_status!="Ready", cluster_name!="undefined"} == 0
for: 10m
labels:
type: pf9
severity: warning
annotations:
summary: Node {{ $labels.node_name }} is not ready!
description: Node {{ $labels.node_name }} cluster {{ $labels.cluster_name }} role {{ $labels.node_role }} is in state {{ $labels.node_status }}
- alert: K8sApiNotResponding
expr: qbert_node_status_ok{node_role="master"} == 0
for: 10m
labels:
type: pf9
severity: warning
annotations:
summary: K8s api of cluster {{ $labels.cluster_name }} is not responding!
description: K8s api is not responding on cluster {{ $labels.cluster_name }} Node {{ $labels.node_hostname }} role {{ $labels.node_role }}
- alert: WorkerNodeNotResponding
expr: qbert_node_status_ok{node_role="worker"} == 0
for: 15m
labels:
type: pf9
severity: warning
annotations:
summary: Worker node not responding!
description: Worker node not responding on cluster {{ $labels.cluster_name }} Node {{ $labels.node_hostname }} role {{ $labels.node_role }}
bash-5.0#Last updated
Was this helpful?
