--- # Source: kube-prometheus-stack/charts/grafana/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm name: prometheus-grafana namespace: monitoring --- # Source: kube-prometheus-stack/charts/kube-state-metrics/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus name: prometheus-kube-state-metrics namespace: monitoring imagePullSecrets: [] --- # Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus-prometheus-node-exporter namespace: monitoring labels: helm.sh/chart: prometheus-node-exporter-4.8.1 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: prometheus-node-exporter app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "1.5.0" jobLabel: node-exporter release: prometheus --- # Source: kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus-kube-prometheus-alertmanager namespace: monitoring labels: app: kube-prometheus-stack-alertmanager app.kubernetes.io/name: kube-prometheus-stack-alertmanager app.kubernetes.io/component: alertmanager app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" --- # Source: kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus-kube-prometheus-operator namespace: monitoring labels: app: kube-prometheus-stack-operator app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator app.kubernetes.io/component: prometheus-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" --- # Source: kube-prometheus-stack/templates/prometheus/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus-kube-prometheus-prometheus namespace: monitoring labels: app: kube-prometheus-stack-prometheus app.kubernetes.io/name: kube-prometheus-stack-prometheus app.kubernetes.io/component: prometheus app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" --- # Source: kube-prometheus-stack/charts/grafana/templates/secret.yaml apiVersion: v1 kind: Secret metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm type: Opaque data: admin-user: "YWRtaW4=" admin-password: "cHJvbS1vcGVyYXRvcg==" ldap-toml: "" --- # Source: kube-prometheus-stack/templates/alertmanager/secret.yaml apiVersion: v1 kind: Secret metadata: name: alertmanager-prometheus-kube-prometheus-alertmanager namespace: monitoring labels: app: kube-prometheus-stack-alertmanager app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: alertmanager.yaml: "Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0KaW5oaWJpdF9ydWxlczoKLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIC0gYWxlcnRuYW1lCiAgc291cmNlX21hdGNoZXJzOgogIC0gc2V2ZXJpdHkgPSBjcml0aWNhbAogIHRhcmdldF9tYXRjaGVyczoKICAtIHNldmVyaXR5ID1+IHdhcm5pbmd8aW5mbwotIGVxdWFsOgogIC0gbmFtZXNwYWNlCiAgLSBhbGVydG5hbWUKICBzb3VyY2VfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IHdhcm5pbmcKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KLSBlcXVhbDoKICAtIG5hbWVzcGFjZQogIHNvdXJjZV9tYXRjaGVyczoKICAtIGFsZXJ0bmFtZSA9IEluZm9JbmhpYml0b3IKICB0YXJnZXRfbWF0Y2hlcnM6CiAgLSBzZXZlcml0eSA9IGluZm8KcmVjZWl2ZXJzOgotIG5hbWU6ICJudWxsIgpyb3V0ZToKICBncm91cF9ieToKICAtIG5hbWVzcGFjZQogIGdyb3VwX2ludGVydmFsOiA1bQogIGdyb3VwX3dhaXQ6IDMwcwogIHJlY2VpdmVyOiAibnVsbCIKICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJvdXRlczoKICAtIG1hdGNoZXJzOgogICAgLSBhbGVydG5hbWUgPX4gIkluZm9JbmhpYml0b3J8V2F0Y2hkb2ciCiAgICByZWNlaXZlcjogIm51bGwiCnRlbXBsYXRlczoKLSAvZXRjL2FsZXJ0bWFuYWdlci9jb25maWcvKi50bXBs" --- # Source: kube-prometheus-stack/charts/grafana/templates/configmap-dashboard-provider.yaml apiVersion: v1 kind: ConfigMap metadata: labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm name: prometheus-grafana-config-dashboards namespace: monitoring data: provider.yaml: |- apiVersion: 1 providers: - name: 'sidecarProvider' orgId: 1 folder: '' type: file disableDeletion: false allowUiUpdates: false updateIntervalSeconds: 30 options: foldersFromFilesStructure: false path: /tmp/dashboards --- # Source: kube-prometheus-stack/charts/grafana/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm data: grafana.ini: | [analytics] check_for_updates = true [grafana_net] url = https://grafana.net [log] mode = console [paths] data = /var/lib/grafana/ logs = /var/log/grafana plugins = /var/lib/grafana/plugins provisioning = /etc/grafana/provisioning [server] domain = '' --- # Source: kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml apiVersion: v1 kind: ConfigMap metadata: name: prometheus-kube-prometheus-grafana-datasource namespace: monitoring labels: grafana_datasource: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: datasource.yaml: |- apiVersion: 1 datasources: - name: Prometheus type: prometheus uid: prometheus url: http://prometheus-kube-prometheus-prometheus.monitoring:9090/ access: proxy isDefault: true jsonData: timeInterval: 30s --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-alertmanager-overview annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: alertmanager-overview.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 1, "hideControls": false, "id": null, "links": [ ], "refresh": "30s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "current set of alerts stored in the Alertmanager", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 2, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Alerts", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "none", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "rate of successful and invalid alerts received by the Alertmanager", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 3, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Received", "refId": "A" }, { "expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Invalid", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Alerts receive rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Alerts", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "rate of successful and invalid notifications sent by the Alertmanager", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": "integration", "seriesOverrides": [ ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Total", "refId": "A" }, { "expr": "sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Failed", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "$integration: Notifications Send Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "latency of notifications sent by the Alertmanager", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 5, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": "integration", "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} 99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Median", "refId": "B" }, { "expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Average", "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "$integration: Notification Duration", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Notifications", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "alertmanager-mixin" ], "templating": { "list": [ { "current": { "text": "Prometheus", "value": "Prometheus" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "namespace", "multi": false, "name": "namespace", "options": [ ], "query": "label_values(alertmanager_alerts, namespace)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "service", "multi": false, "name": "service", "options": [ ], "query": "label_values(alertmanager_alerts, service)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "all", "value": "$__all" }, "datasource": "$datasource", "hide": 2, "includeAll": true, "label": null, "multi": false, "name": "integration", "options": [ ], "query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Alertmanager / Overview", "uid": "alertmanager-overview", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-apiserver annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: apiserver.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", "datasource": null, "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", "gridPos": { "h": 2, "w": 24, "x": 0, "y": 0 }, "id": 2, "mode": "markdown", "span": 12, "title": "Notice", "type": "text" } ], "refresh": "10s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 3, "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 4, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Availability (30d) > 99.000%", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "decimals": 3, "description": "How much error budget is left looking at our 0.990% availability guarantees?", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 8, "stack": false, "steppedLine": false, "targets": [ { "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)", "format": "time_series", "intervalFactor": 2, "legendFormat": "errorbudget", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "ErrorBudget (30d) > 99.000%", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "decimals": 3, "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "decimals": 3, "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 3, "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Read Availability (30d)", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "How many read requests (LIST,GET) per second do the apiservers get by code?", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/2../i", "color": "#56A64B" }, { "alias": "/3../i", "color": "#F2CC0C" }, { "alias": "/4../i", "color": "#3274D9" }, { "alias": "/5../i", "color": "#E02F44" } ], "spaceLength": 10, "span": 3, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ code }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Read SLI - Requests", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "reqps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "reqps", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ resource }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Read SLI - Errors", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ resource }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Read SLI - Duration", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 3, "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Write Availability (30d)", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/2../i", "color": "#56A64B" }, { "alias": "/3../i", "color": "#F2CC0C" }, { "alias": "/4../i", "color": "#3274D9" }, { "alias": "/5../i", "color": "#E02F44" } ], "spaceLength": 10, "span": 3, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ code }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Write SLI - Requests", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "reqps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "reqps", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ resource }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Write SLI - Errors", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ resource }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Write SLI - Duration", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 13, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 14, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 15, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 16, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 17, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 18, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"apiserver\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "instance", "options": [ ], "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / API server", "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-cluster-total annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: cluster-total.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 2, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 1 }, "id": 3, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Received", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 1 }, "id": 4, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Transmitted", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "columns": [ { "text": "Time", "value": "Time" }, { "text": "Value #A", "value": "Value #A" }, { "text": "Value #B", "value": "Value #B" }, { "text": "Value #C", "value": "Value #C" }, { "text": "Value #D", "value": "Value #D" }, { "text": "Value #E", "value": "Value #E" }, { "text": "Value #F", "value": "Value #F" }, { "text": "Value #G", "value": "Value #G" }, { "text": "Value #H", "value": "Value #H" }, { "text": "namespace", "value": "namespace" } ], "datasource": "$datasource", "fill": 1, "fontSize": "90%", "gridPos": { "h": 9, "w": 24, "x": 0, "y": 10 }, "id": 5, "lines": true, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", "scroll": true, "showHeader": true, "sort": { "col": 0, "desc": false }, "spaceLength": 10, "span": 24, "styles": [ { "alias": "Time", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Time", "thresholds": [ ], "type": "hidden", "unit": "short" }, { "alias": "Current Bandwidth Received", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Current Bandwidth Transmitted", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Average Bandwidth Received", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Average Bandwidth Transmitted", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #H", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Namespace", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "Drill down", "linkUrl": "d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ ], "type": "number", "unit": "short" } ], "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "H", "step": 10 } ], "timeFrom": null, "timeShift": null, "title": "Current Status", "type": "table" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, "id": 6, "panels": [ { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 11 }, "id": 7, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Rate of Bytes Received", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 11 }, "id": 8, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Rate of Bytes Transmitted", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 11 }, "id": 9, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth History", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 12 }, "id": 10, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 21 }, "id": 11, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }, "id": 12, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 31 }, "id": 13, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 40 }, "id": 14, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Packets", "titleSize": "h6", "type": "row" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 31 }, "id": 15, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 50 }, "id": 16, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 59 }, "id": 17, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 59 }, "id": 18, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ { "targetBlank": true, "title": "What is TCP Retransmit?", "url": "https://accedian.com/enterprises/blog/network-packet-loss-retransmissions-and-duplicate-acknowledgements/" } ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of TCP Retransmits out of all sent segments", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 59 }, "id": 19, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [ { "targetBlank": true, "title": "Why monitor SYN retransmits?", "url": "https://github.com/prometheus/node_exporter/issues/1023#issuecomment-408128365" } ], "minSpan": 24, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of TCP SYN Retransmits out of all retransmits", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Errors", "titleSize": "h6", "type": "row" } ], "refresh": "10s", "rows": [ ], "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "resolution", "options": [ { "selected": false, "text": "30s", "value": "30s" }, { "selected": true, "text": "5m", "value": "5m" }, { "selected": false, "text": "1h", "value": "1h" } ], "query": "30s,5m,1h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "interval", "options": [ { "selected": true, "text": "4h", "value": "4h" } ], "query": "4h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false }, { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Networking / Cluster", "uid": "ff635a025bcfea7bc3dd4f508990a3e9", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/controller-manager.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-controller-manager annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: controller-manager.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Up", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "min" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} {{name}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} {{name}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} {{name}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Kube API Request Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 8, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Post Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "instance", "options": [ ], "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Controller Manager", "uid": "72e0e05bef5099e5f049b05fdc429ed4", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-etcd annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: etcd.json: |- { "annotations": { "list": [] }, "description": "etcd sample Grafana dashboard with Prometheus", "editable": true, "gnetId": null, "hideControls": false, "links": [], "refresh": "10s", "rows": [ { "collapse": false, "editable": true, "height": "250px", "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "datasource": "$datasource", "editable": true, "error": false, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "id": 28, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "targets": [ { "expr": "sum(etcd_server_has_leader{job=\"$cluster\"})", "intervalFactor": 2, "legendFormat": "", "metric": "etcd_server_has_leader", "refId": "A", "step": 20 } ], "thresholds": "", "title": "Up", "type": "singlestat", "valueFontSize": "200%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "avg" }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 0, "id": 23, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 5, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(grpc_server_started_total{job=\"$cluster\",grpc_type=\"unary\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "RPC Rate", "metric": "grpc_server_started_total", "refId": "A", "step": 2 }, { "expr": "sum(rate(grpc_server_handled_total{job=\"$cluster\",grpc_type=\"unary\",grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "RPC Failed Rate", "metric": "grpc_server_handled_total", "refId": "B", "step": 2 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "RPC Rate", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 0, "id": 41, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 4, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(grpc_server_started_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})", "intervalFactor": 2, "legendFormat": "Watch Streams", "metric": "grpc_server_handled_total", "refId": "A", "step": 4 }, { "expr": "sum(grpc_server_started_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})", "intervalFactor": 2, "legendFormat": "Lease Streams", "metric": "grpc_server_handled_total", "refId": "B", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Active Streams", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "showTitle": false, "title": "Row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "$datasource", "decimals": null, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 1, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "etcd_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} DB Size", "metric": "", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "DB Size", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 0, "grid": {}, "id": 3, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 1, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 4, "stack": false, "steppedLine": true, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=\"$cluster\"}[$__rate_interval])) by (instance, le))", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}} WAL fsync", "metric": "etcd_disk_wal_fsync_duration_seconds_bucket", "refId": "A", "step": 4 }, { "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=\"$cluster\"}[$__rate_interval])) by (instance, le))", "intervalFactor": 2, "legendFormat": "{{instance}} DB fsync", "metric": "etcd_disk_backend_commit_duration_seconds_bucket", "refId": "B", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Disk Sync Duration", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 0, "id": 29, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "process_resident_memory_bytes{job=\"$cluster\"}", "intervalFactor": 2, "legendFormat": "{{instance}} Resident Memory", "metric": "process_resident_memory_bytes", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Memory", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 5, "id": 22, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 3, "stack": true, "steppedLine": false, "targets": [ { "expr": "rate(etcd_network_client_grpc_received_bytes_total{job=\"$cluster\"}[$__rate_interval])", "intervalFactor": 2, "legendFormat": "{{instance}} Client Traffic In", "metric": "etcd_network_client_grpc_received_bytes_total", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Client Traffic In", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 5, "id": 21, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 3, "stack": true, "steppedLine": false, "targets": [ { "expr": "rate(etcd_network_client_grpc_sent_bytes_total{job=\"$cluster\"}[$__rate_interval])", "intervalFactor": 2, "legendFormat": "{{instance}} Client Traffic Out", "metric": "etcd_network_client_grpc_sent_bytes_total", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Client Traffic Out", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 0, "id": 20, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(etcd_network_peer_received_bytes_total{job=\"$cluster\"}[$__rate_interval])) by (instance)", "intervalFactor": 2, "legendFormat": "{{instance}} Peer Traffic In", "metric": "etcd_network_peer_received_bytes_total", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Peer Traffic In", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "decimals": null, "editable": true, "error": false, "fill": 0, "grid": {}, "id": 16, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(etcd_network_peer_sent_bytes_total{job=\"$cluster\"}[$__rate_interval])) by (instance)", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} Peer Traffic Out", "metric": "etcd_network_peer_sent_bytes_total", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Peer Traffic Out", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": null, "show": true } ] } ], "title": "New row" }, { "collapse": false, "editable": true, "height": "250px", "panels": [ { "aliasColors": {}, "bars": false, "datasource": "$datasource", "editable": true, "error": false, "fill": 0, "id": 40, "isNew": true, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(etcd_server_proposals_failed_total{job=\"$cluster\"}[$__rate_interval]))", "intervalFactor": 2, "legendFormat": "Proposal Failure Rate", "metric": "etcd_server_proposals_failed_total", "refId": "A", "step": 2 }, { "expr": "sum(etcd_server_proposals_pending{job=\"$cluster\"})", "intervalFactor": 2, "legendFormat": "Proposal Pending Total", "metric": "etcd_server_proposals_pending", "refId": "B", "step": 2 }, { "expr": "sum(rate(etcd_server_proposals_committed_total{job=\"$cluster\"}[$__rate_interval]))", "intervalFactor": 2, "legendFormat": "Proposal Commit Rate", "metric": "etcd_server_proposals_committed_total", "refId": "C", "step": 2 }, { "expr": "sum(rate(etcd_server_proposals_applied_total{job=\"$cluster\"}[$__rate_interval]))", "intervalFactor": 2, "legendFormat": "Proposal Apply Rate", "refId": "D", "step": 2 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Raft Proposals", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "datasource": "$datasource", "decimals": 0, "editable": true, "error": false, "fill": 0, "id": 19, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "changes(etcd_server_leader_changes_seen_total{job=\"$cluster\"}[1d])", "intervalFactor": 2, "legendFormat": "{{instance}} Total Leader Elections Per Day", "metric": "etcd_server_leader_changes_seen_total", "refId": "A", "step": 2 } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Total Leader Elections Per Day", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "decimals": 0, "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 28 }, "hiddenSeries": false, "id": 42, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=\"$cluster\"}[$__rate_interval])))", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} Peer round trip time", "metric": "etcd_network_peer_round_trip_time_seconds_bucket", "refId": "A", "step": 2 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Peer round trip time", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:925", "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:926", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "title": "New row" } ], "schemaVersion": 13, "sharedCrosshair": false, "style": "dark", "tags": [ "etcd-mixin" ], "templating": { "list": [ { "current": { "text": "Prometheus", "value": "Prometheus" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "prod", "value": "prod" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [], "query": "label_values(etcd_server_has_leader, job)", "refresh": 2, "regex": "", "sort": 2, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-15m", "to": "now" }, "timepicker": { "now": true, "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "etcd", "uid": "c2f4e12cdf69feb95caa41a5a1b423d9", "version": 215 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-grafana-overview annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: grafana-overview.json: |- { "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [ ], "type": "dashboard" }, "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "id": 3085, "iteration": 1631554945276, "links": [ ], "panels": [ { "datasource": "$datasource", "fieldConfig": { "defaults": { "mappings": [ ], "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [ ] }, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 }, "id": 6, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "mean" ], "fields": "", "values": false }, "text": { }, "textMode": "auto" }, "pluginVersion": "8.1.3", "targets": [ { "expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}", "instant": true, "interval": "", "legendFormat": "", "refId": "A" } ], "timeFrom": null, "timeShift": null, "title": "Firing Alerts", "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [ ] }, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 }, "id": 8, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "mean" ], "fields": "", "values": false }, "text": { }, "textMode": "auto" }, "pluginVersion": "8.1.3", "targets": [ { "expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})", "interval": "", "legendFormat": "", "refId": "A" } ], "timeFrom": null, "timeShift": null, "title": "Dashboards", "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { "align": null, "displayMode": "auto" }, "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [ ] }, "gridPos": { "h": 5, "w": 12, "x": 12, "y": 0 }, "id": 10, "options": { "showHeader": true }, "pluginVersion": "8.1.3", "targets": [ { "expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}", "instant": true, "interval": "", "legendFormat": "", "refId": "A" } ], "timeFrom": null, "timeShift": null, "title": "Build Info", "transformations": [ { "id": "labelsToFields", "options": { } }, { "id": "organize", "options": { "excludeByName": { "Time": true, "Value": true, "branch": true, "container": true, "goversion": true, "namespace": true, "pod": true, "revision": true }, "indexByName": { "Time": 7, "Value": 11, "branch": 4, "container": 8, "edition": 2, "goversion": 6, "instance": 1, "job": 0, "namespace": 9, "pod": 10, "revision": 5, "version": 3 }, "renameByName": { } } } ], "type": "table" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ] }, "overrides": [ ] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }, "hiddenSeries": false, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.1.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ", "interval": "", "legendFormat": "{{status_code}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, "title": "RPS", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "$$hashKey": "object:157", "format": "reqps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:158", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ] }, "overrides": [ ] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }, "hiddenSeries": false, "id": 4, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.1.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, "expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", "interval": "", "legendFormat": "99th Percentile", "refId": "A" }, { "exemplar": true, "expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", "interval": "", "legendFormat": "50th Percentile", "refId": "B" }, { "exemplar": true, "expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))", "interval": "", "legendFormat": "Average", "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, "title": "Request Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "$$hashKey": "object:210", "format": "ms", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "$$hashKey": "object:211", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "schemaVersion": 30, "style": "dark", "tags": [ ], "templating": { "list": [ { "current": { "selected": true, "text": "dev-cortex", "value": "dev-cortex" }, "description": null, "error": null, "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "datasource", "options": [ ], "query": "prometheus", "queryValue": "", "refresh": 1, "regex": "", "skipUrlSync": false, "type": "datasource" }, { "allValue": ".*", "current": { "selected": false, "text": [ "default/grafana" ], "value": [ "default/grafana" ] }, "datasource": "$datasource", "definition": "label_values(grafana_build_info, job)", "description": null, "error": null, "hide": 0, "includeAll": true, "label": null, "multi": true, "name": "job", "options": [ ], "query": { "query": "label_values(grafana_build_info, job)", "refId": "Billing Admin-job-Variable-Query" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".*", "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "$datasource", "definition": "label_values(grafana_build_info, instance)", "description": null, "error": null, "hide": 0, "includeAll": true, "label": null, "multi": true, "name": "instance", "options": [ ], "query": { "query": "label_values(grafana_build_info, instance)", "refId": "Billing Admin-instance-Variable-Query" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-6h", "to": "now" }, "timepicker": { "refresh_intervals": [ "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ] }, "timezone": "utc", "title": "Grafana Overview", "uid": "6be0s85Mk", "version": 2 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-coredns annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-coredns.json: |- { "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "A dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based on the CoreDNS dashboard by buhay.", "editable": true, "gnetId": 12539, "graphTooltip": 0, "iteration": 1603798405693, "links": [ { "icon": "external link", "tags": [], "targetBlank": true, "title": "CoreDNS.io", "type": "link", "url": "https://coredns.io" } ], "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 0 }, "hiddenSeries": false, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "yaxis": 2 } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(coredns_dns_request_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (proto) or\nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (proto)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Requests (total)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 8, "y": 0 }, "hiddenSeries": false, "id": 4, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "yaxis": 2 }, { "alias": "other", "yaxis": 2 } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(coredns_dns_request_type_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type) or \nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type)", "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Requests (by qtype)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 0 }, "hiddenSeries": false, "id": 6, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "yaxis": 2 } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(coredns_dns_request_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (zone) or\nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (zone)", "interval": "", "intervalFactor": 2, "legendFormat": "{{zone}}", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Requests (by zone)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, "hiddenSeries": false, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "yaxis": 2 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(coredns_dns_request_do_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_do_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m]))", "interval": "", "intervalFactor": 2, "legendFormat": "DO", "refId": "A", "step": 40 }, { "expr": "sum(rate(coredns_dns_request_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m]))", "interval": "", "intervalFactor": 2, "legendFormat": "total", "refId": "B", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Requests (DO bit)", "tooltip": { "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 6, "x": 12, "y": 7 }, "hiddenSeries": false, "id": 10, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "tcp:90", "yaxis": 2 }, { "alias": "tcp:99 ", "yaxis": 2 }, { "alias": "tcp:50", "yaxis": 2 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}:99 ", "refId": "A", "step": 60 }, { "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:90", "refId": "B", "step": 60 }, { "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:50", "refId": "C", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Requests (size, udp)", "tooltip": { "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 6, "x": 18, "y": 7 }, "hiddenSeries": false, "id": 12, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "tcp:90", "yaxis": 1 }, { "alias": "tcp:99 ", "yaxis": 1 }, { "alias": "tcp:50", "yaxis": 1 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}:99 ", "refId": "A", "step": 60 }, { "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}:90", "refId": "B", "step": 60 }, { "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}:50", "refId": "C", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Requests (size,tcp)", "tooltip": { "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, "hiddenSeries": false, "id": 14, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(coredns_dns_response_rcode_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (rcode) or\nsum(rate(coredns_dns_responses_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (rcode)", "interval": "", "intervalFactor": 2, "legendFormat": "{{rcode}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Responses (by rcode)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 }, "hiddenSeries": false, "id": 32, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job=\"coredns\",instance=~\"$instance\"}[5m])) by (le, job))", "format": "time_series", "intervalFactor": 2, "legendFormat": "99%", "refId": "A", "step": 40 }, { "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{job=\"coredns\",instance=~\"$instance\"}[5m])) by (le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "90%", "refId": "B", "step": 40 }, { "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{job=\"coredns\",instance=~\"$instance\"}[5m])) by (le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "50%", "refId": "C", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Responses (duration)", "tooltip": { "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 21 }, "hiddenSeries": false, "id": 18, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "udp:50%", "yaxis": 1 }, { "alias": "tcp:50%", "yaxis": 2 }, { "alias": "tcp:90%", "yaxis": 2 }, { "alias": "tcp:99%", "yaxis": 2 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}:99%", "refId": "A", "step": 40 }, { "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", "interval": "", "intervalFactor": 2, "legendFormat": "{{proto}}:90%", "refId": "B", "step": 40 }, { "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", "hide": false, "intervalFactor": 2, "legendFormat": "{{proto}}:50%", "metric": "", "refId": "C", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Responses (size, udp)", "tooltip": { "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 21 }, "hiddenSeries": false, "id": 20, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "udp:50%", "yaxis": 1 }, { "alias": "tcp:50%", "yaxis": 1 }, { "alias": "tcp:90%", "yaxis": 1 }, { "alias": "tcp:99%", "yaxis": 1 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{proto}}:99%", "refId": "A", "step": 40 }, { "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{proto}}:90%", "refId": "B", "step": 40 }, { "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{proto}}:50%", "metric": "", "refId": "C", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Responses (size, tcp)", "tooltip": { "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 28 }, "hiddenSeries": false, "id": 22, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(coredns_cache_size{job=\"coredns\",instance=~\"$instance\"}) by (type) or\nsum(coredns_cache_entries{job=\"coredns\",instance=~\"$instance\"}) by (type)", "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Cache (size)", "tooltip": { "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 28 }, "hiddenSeries": false, "id": 24, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.2.0", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "misses", "yaxis": 2 } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(coredns_cache_hits_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type)", "hide": false, "intervalFactor": 2, "legendFormat": "hits:{{type}}", "refId": "A", "step": 40 }, { "expr": "sum(rate(coredns_cache_misses_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type)", "hide": false, "intervalFactor": 2, "legendFormat": "misses", "refId": "B", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Cache (hitrate)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "logBase": 1, "max": null, "min": 0, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "refresh": "10s", "schemaVersion": 26, "style": "dark", "tags": [ "dns", "coredns" ], "templating": { "list": [ { "current": { "selected": true, "text": "default", "value": "default" }, "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "datasource", "options": [], "query": "prometheus", "queryValue": "", "refresh": 1, "regex": "", "skipUrlSync": false, "type": "datasource" }, { "allValue": ".*", "current": { "selected": true, "text": "All", "value": "$__all" }, "datasource": "$datasource", "definition": "label_values(up{job=\"coredns\"}, instance)", "hide": 0, "includeAll": true, "label": "Instance", "multi": false, "name": "instance", "options": [], "query": "label_values(up{job=\"coredns\"}, instance)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 3, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-3h", "to": "now" }, "timepicker": { "refresh_intervals": [ "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ] }, "timezone": "utc", "title": "CoreDNS", "uid": "vkQ0UHxik", "version": 2 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-resources-cluster annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-resources-cluster.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "height": "100px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 1, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 2, "stack": false, "steppedLine": false, "targets": [ { "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 2, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "CPU Requests Commitment", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 2, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "CPU Limits Commitment", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 2, "stack": false, "steppedLine": false, "targets": [ { "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 2, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "Memory Requests Commitment", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 2, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "Memory Limits Commitment", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Headlines", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Pods", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Workloads", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to workloads", "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "CPU Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Namespace", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Pods", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Workloads", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to workloads", "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Memory Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Namespace", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Requests by Namespace", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Requests", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Namespace", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Network Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 13, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 14, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Container Bandwidth by Namespace: Received", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 15, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Container Bandwidth by Namespace: Transmitted", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Average Container Bandwidth by Namespace", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 16, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 17, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 18, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 19, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "decimals": -1, "fill": 10, "id": 20, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 21, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 22, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "sort": { "col": 4, "desc": true }, "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "IOPS(Reads)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "IOPS(Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "IOPS(Reads + Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Throughput(Read)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Throughput(Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Throughput(Read + Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Namespace", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO - Distribution", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Compute Resources / Cluster", "uid": "efa86fd1d0c121a26444b636a3f509a8", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-resources-namespace annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-resources-namespace.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "height": "100px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 1, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "CPU Utilisation (from requests)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "CPU Utilisation (from limits)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "Memory Utilisation (from requests)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "format": "percentunit", "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "format": "time_series", "instant": true, "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "Memory Utilisation (from limits)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Headlines", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "quota - requests", "color": "#F2495C", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false }, { "alias": "quota - limits", "color": "#FF9830", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "CPU Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "quota - requests", "color": "#F2495C", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false }, { "alias": "quota - limits", "color": "#FF9830", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Memory Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Usage (Cache)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Usage (Swap)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #H", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 }, { "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "H", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Network Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 13, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 14, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 15, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "decimals": -1, "fill": 10, "id": 16, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 17, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 18, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "sort": { "col": 4, "desc": true }, "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "IOPS(Reads)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "IOPS(Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "IOPS(Reads + Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Throughput(Read)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Throughput(Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Throughput(Read + Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO - Distribution", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Compute Resources / Namespace (Pods)", "uid": "85a562078cdf77779eaa1add43ccec1e", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-resources-node annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-resources-node.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 1, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "max capacity", "color": "#F2495C", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "max capacity", "legendLink": null, "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "CPU Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "max capacity", "color": "#F2495C", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "max capacity", "legendLink": null, "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Memory Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Usage (Cache)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Usage (Swap)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #H", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "H", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Quota", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": true, "name": "node", "options": [ ], "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Compute Resources / Node (Pods)", "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-resources-pod annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-resources-pod.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 1, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "requests", "color": "#F2495C", "fill": 0, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false }, { "alias": "limits", "color": "#FF9830", "fill": 0, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null, "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "requests", "legendLink": null, "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "limits", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null, "step": 10 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.25, "yaxis": "left" } ], "timeFrom": null, "timeShift": null, "title": "CPU Throttling", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": 1, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Throttling", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "CPU Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Container", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "container", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "requests", "color": "#F2495C", "dashes": true, "fill": 0, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false }, { "alias": "limits", "color": "#FF9830", "dashes": true, "fill": 0, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null, "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "requests", "legendLink": null, "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "limits", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage (WSS)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Memory Usage (WSS)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Usage (Cache)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Usage (Swap)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #H", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Container", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "container", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 }, { "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "H", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "decimals": -1, "fill": 10, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Reads", "legendLink": null, "step": 10 }, { "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Writes", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "IOPS", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 13, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Reads", "legendLink": null, "step": 10 }, { "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Writes", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "ThroughPut", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO - Distribution(Pod - Read & Writes)", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "decimals": -1, "fill": 10, "id": 14, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 15, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO - Distribution(Containers)", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 16, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "sort": { "col": 4, "desc": true }, "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "IOPS(Reads)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "IOPS(Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "IOPS(Reads + Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Throughput(Read)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Throughput(Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Throughput(Read + Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Container", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "container", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage IO - Distribution", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "pod", "options": [ ], "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Compute Resources / Pod", "uid": "6581e46e4e5c7ba40a07646395ef7b23", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-resources-workload annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-resources-workload.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 1, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "CPU Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Memory Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Network Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Container Bandwidth by Pod: Received", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Container Bandwidth by Pod: Transmitted", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Average Container Bandwidth by Pod", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 13, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "type", "options": [ ], "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "workload", "options": [ ], "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Compute Resources / Workload", "uid": "a164a7f0339f99e89cea5cb47e9be617", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-k8s-resources-workloads-namespace annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: k8s-resources-workloads-namespace.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 1, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "quota - requests", "color": "#F2495C", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false }, { "alias": "quota - limits", "color": "#FF9830", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Running Pods", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "CPU Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Workload", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", "pattern": "workload", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Workload Type", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "workload_type", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "quota - requests", "color": "#F2495C", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false }, { "alias": "quota - limits", "color": "#FF9830", "dashes": true, "fill": 0, "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, "stack": false } ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null, "step": 10 }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Running Pods", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Memory Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Requests %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Memory Limits", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "bytes" }, { "alias": "Memory Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "percentunit" }, { "alias": "Workload", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", "pattern": "workload", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Workload Type", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "workload_type", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Quota", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory Quota", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Workload", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down to pods", "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", "pattern": "workload", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Workload Type", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "workload_type", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Network Usage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Container Bandwidth by Workload: Received", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Container Bandwidth by Workload: Transmitted", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Average Container Bandwidth by Workload", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 13, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "deployment", "value": "deployment" }, "datasource": "$datasource", "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "type", "options": [ ], "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Compute Resources / Namespace (Workloads)", "uid": "a87fb0d919ec0ea5f6543124e16c42a5", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-kubelet annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: kubelet.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ], "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ ] }, "unit": "none" } }, "gridPos": { "h": 7, "w": 4, "x": 0, "y": 0 }, "id": 2, "links": [ ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "auto" }, "pluginVersion": "7", "targets": [ { "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "title": "Running Kubelets", "transparent": false, "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ], "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ ] }, "unit": "none" } }, "gridPos": { "h": 7, "w": 4, "x": 4, "y": 0 }, "id": 3, "links": [ ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "auto" }, "pluginVersion": "7", "targets": [ { "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Running Pods", "transparent": false, "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ], "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ ] }, "unit": "none" } }, "gridPos": { "h": 7, "w": 4, "x": 8, "y": 0 }, "id": 4, "links": [ ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "auto" }, "pluginVersion": "7", "targets": [ { "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Running Containers", "transparent": false, "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ], "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ ] }, "unit": "none" } }, "gridPos": { "h": 7, "w": 4, "x": 12, "y": 0 }, "id": 5, "links": [ ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "auto" }, "pluginVersion": "7", "targets": [ { "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Actual Volume Count", "transparent": false, "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ], "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ ] }, "unit": "none" } }, "gridPos": { "h": 7, "w": 4, "x": 16, "y": 0 }, "id": 6, "links": [ ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "auto" }, "pluginVersion": "7", "targets": [ { "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Desired Volume Count", "transparent": false, "type": "stat" }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "links": [ ], "mappings": [ ], "thresholds": { "mode": "absolute", "steps": [ ] }, "unit": "none" } }, "gridPos": { "h": 7, "w": 4, "x": 20, "y": 0 }, "id": 7, "links": [ ], "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "auto" }, "pluginVersion": "7", "targets": [ { "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Config Error Count", "transparent": false, "type": "stat" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, "id": 8, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Operation Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 7 }, "id": 9, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Operation Error Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 14 }, "id": 10, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Operation duration 99th quantile", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 21 }, "id": 11, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} pod", "refId": "A" }, { "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} worker", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Pod Start Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 21 }, "id": 12, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} pod", "refId": "A" }, { "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} worker", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Pod Start Duration", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 28 }, "id": 13, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Storage Operation Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 28 }, "id": 14, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Storage Operation Error Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 35 }, "id": 15, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Storage Operation Duration 99th quantile", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 42 }, "id": 16, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{operation_type}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Cgroup manager operation rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 42 }, "id": 17, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Cgroup manager 99th quantile", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Pod lifecycle event generator", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 49 }, "id": 18, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "PLEG relist rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 49 }, "id": 19, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "PLEG relist interval", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 56 }, "id": 20, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "PLEG relist duration", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 63 }, "id": 21, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "RPC Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 70 }, "id": 22, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Request duration 99th quantile", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 77 }, "id": 23, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, "y": 77 }, "id": 24, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU usage", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 77 }, "id": 25, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Goroutines", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "refresh": "10s", "rows": [ ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": "instance", "multi": false, "name": "instance", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Kubelet", "uid": "3138fa155d5915769fbded898ac09fd9", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-namespace-by-pod annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: namespace-by-pod.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 2, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 1 }, "height": 9, "id": 3, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "minSpan": 12, "nullPointMode": "connected", "nullText": null, "options": { "fieldOptions": { "calcs": [ "last" ], "defaults": { "max": 10000000000, "min": 0, "title": "$namespace", "unit": "Bps" }, "mappings": [ ], "override": { }, "thresholds": [ { "color": "dark-green", "index": 0, "value": null }, { "color": "dark-yellow", "index": 1, "value": 5000000000 }, { "color": "dark-red", "index": 2, "value": 7000000000 } ], "values": false } }, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 12, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", "format": "time_series", "instant": null, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Received", "type": "gauge", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 1 }, "height": 9, "id": 4, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "minSpan": 12, "nullPointMode": "connected", "nullText": null, "options": { "fieldOptions": { "calcs": [ "last" ], "defaults": { "max": 10000000000, "min": 0, "title": "$namespace", "unit": "Bps" }, "mappings": [ ], "override": { }, "thresholds": [ { "color": "dark-green", "index": 0, "value": null }, { "color": "dark-yellow", "index": 1, "value": 5000000000 }, { "color": "dark-red", "index": 2, "value": 7000000000 } ], "values": false } }, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 12, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", "format": "time_series", "instant": null, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Transmitted", "type": "gauge", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "columns": [ { "text": "Time", "value": "Time" }, { "text": "Value #A", "value": "Value #A" }, { "text": "Value #B", "value": "Value #B" }, { "text": "Value #C", "value": "Value #C" }, { "text": "Value #D", "value": "Value #D" }, { "text": "Value #E", "value": "Value #E" }, { "text": "Value #F", "value": "Value #F" }, { "text": "pod", "value": "pod" } ], "datasource": "$datasource", "fill": 1, "fontSize": "100%", "gridPos": { "h": 9, "w": 24, "x": 0, "y": 10 }, "id": 5, "lines": true, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", "scroll": true, "showHeader": true, "sort": { "col": 0, "desc": false }, "spaceLength": 10, "span": 24, "styles": [ { "alias": "Time", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Time", "thresholds": [ ], "type": "hidden", "unit": "short" }, { "alias": "Bandwidth Received", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Bandwidth Transmitted", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "Drill down", "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ ], "type": "number", "unit": "short" } ], "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 } ], "timeFrom": null, "timeShift": null, "title": "Current Status", "type": "table" }, { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 19 }, "id": 6, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 20 }, "id": 7, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 20 }, "id": 8, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }, "id": 9, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 30 }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 30 }, "id": 11, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Packets", "titleSize": "h6", "type": "row" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }, "id": 12, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 40 }, "id": 13, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 40 }, "id": 14, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Errors", "titleSize": "h6", "type": "row" } ], "refresh": "10s", "rows": [ ], "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "kube-system", "value": "kube-system" }, "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "resolution", "options": [ { "selected": false, "text": "30s", "value": "30s" }, { "selected": true, "text": "5m", "value": "5m" }, { "selected": false, "text": "1h", "value": "1h" } ], "query": "30s,5m,1h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "interval", "options": [ { "selected": true, "text": "4h", "value": "4h" } ], "query": "4h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Networking / Namespace (Pods)", "uid": "8b7a8b326d7a6f1f04244066368c67af", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-namespace-by-workload annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: namespace-by-workload.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 2, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 1 }, "id": 3, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ workload }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Received", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 1 }, "id": 4, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ workload }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Transmitted", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "columns": [ { "text": "Time", "value": "Time" }, { "text": "Value #A", "value": "Value #A" }, { "text": "Value #B", "value": "Value #B" }, { "text": "Value #C", "value": "Value #C" }, { "text": "Value #D", "value": "Value #D" }, { "text": "Value #E", "value": "Value #E" }, { "text": "Value #F", "value": "Value #F" }, { "text": "Value #G", "value": "Value #G" }, { "text": "Value #H", "value": "Value #H" }, { "text": "workload", "value": "workload" } ], "datasource": "$datasource", "fill": 1, "fontSize": "90%", "gridPos": { "h": 9, "w": 24, "x": 0, "y": 10 }, "id": 5, "lines": true, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", "scroll": true, "showHeader": true, "sort": { "col": 0, "desc": false }, "spaceLength": 10, "span": 24, "styles": [ { "alias": "Time", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Time", "thresholds": [ ], "type": "hidden", "unit": "short" }, { "alias": "Current Bandwidth Received", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Current Bandwidth Transmitted", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Average Bandwidth Received", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Average Bandwidth Transmitted", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", "thresholds": [ ], "type": "number", "unit": "Bps" }, { "alias": "Rate of Received Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #H", "thresholds": [ ], "type": "number", "unit": "pps" }, { "alias": "Workload", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "Drill down", "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", "pattern": "workload", "thresholds": [ ], "type": "number", "unit": "short" } ], "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 }, { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "C", "step": 10 }, { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "D", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 }, { "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "H", "step": 10 } ], "timeFrom": null, "timeShift": null, "title": "Current Status", "type": "table" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 19 }, "id": 6, "panels": [ { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 20 }, "id": 7, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ workload }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Rate of Bytes Received", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 20 }, "id": 8, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ workload }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Rate of Bytes Transmitted", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }, "id": 9, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth HIstory", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 38 }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{workload}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 38 }, "id": 11, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{workload}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 39 }, "id": 12, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 40 }, "id": 13, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{workload}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 40 }, "id": 14, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{workload}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Packets", "titleSize": "h6", "type": "row" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 40 }, "id": 15, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 41 }, "id": 16, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{workload}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 41 }, "id": 17, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{workload}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Errors", "titleSize": "h6", "type": "row" } ], "refresh": "10s", "rows": [ ], "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "kube-system", "value": "kube-system" }, "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "deployment", "value": "deployment" }, "datasource": "$datasource", "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "type", "options": [ ], "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "resolution", "options": [ { "selected": false, "text": "30s", "value": "30s" }, { "selected": true, "text": "5m", "value": "5m" }, { "selected": false, "text": "1h", "value": "1h" } ], "query": "30s,5m,1h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "interval", "options": [ { "selected": true, "text": "4h", "value": "4h" } ], "query": "4h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Networking / Namespace (Workload)", "uid": "bbb2a765a623ae38130206c7d94a160f", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-node-cluster-rsrc-use annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: node-cluster-rsrc-use.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 1, "hideControls": false, "id": null, "links": [ ], "refresh": "30s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 2, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ instance }}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 3, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Saturation (Load1 per CPU)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 4, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 5, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Saturation (Major Page Faults)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "rds", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "rds", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 6, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/Receive/", "stack": "A" }, { "alias": "/Transmit/", "stack": "B", "transform": "negative-Y" } ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Receive", "refId": "A" }, { "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Transmit", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Utilisation (Bytes Receive/Transmit)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 7, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/ Receive/", "stack": "A" }, { "alias": "/ Transmit/", "stack": "B", "transform": "negative-Y" } ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Receive", "refId": "A" }, { "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} Transmit", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Saturation (Drops Receive/Transmit)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Network", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 8, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} {{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk IO Saturation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Disk IO", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk Space Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Disk Space", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "node-exporter-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(node_time_seconds, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Node Exporter / USE Method / Cluster", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-node-rsrc-use annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: node-rsrc-use.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 1, "hideControls": false, "id": null, "links": [ ], "refresh": "30s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 2, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Utilisation", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 3, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Saturation", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Saturation (Load1 per CPU)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 4, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Utilisation", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 5, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Major page Faults", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Saturation (Major Page Faults)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "rds", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "rds", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 6, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/Receive/", "stack": "A" }, { "alias": "/Transmit/", "stack": "B", "transform": "negative-Y" } ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Receive", "refId": "A" }, { "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Transmit", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Utilisation (Bytes Receive/Transmit)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 7, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/ Receive/", "stack": "A" }, { "alias": "/ Transmit/", "stack": "B", "transform": "negative-Y" } ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Receive", "refId": "A" }, { "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "Transmit", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Saturation (Drops Receive/Transmit)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Network", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 8, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk IO Saturation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Disk IO", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "fillGradient": 0, "gridPos": { }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n ) != 0\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk Space Utilisation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Disk Space", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "node-exporter-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "text": "", "value": "" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(node_time_seconds, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "instance", "options": [ ], "query": "label_values(node_exporter_build_info{job=\"node-exporter\", cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Node Exporter / USE Method / Node", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-nodes-darwin annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: nodes-darwin.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 1, "hideControls": false, "id": null, "links": [ ], "refresh": "30s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 2, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", "format": "time_series", "intervalFactor": 5, "legendFormat": "{{cpu}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": 1, "min": 0, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": 1, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 3, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "1m load average", "refId": "A" }, { "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "5m load average", "refId": "B" }, { "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "15m load average", "refId": "C" }, { "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "logical cores", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Load Average", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 9, "stack": false, "steppedLine": false, "targets": [ { "expr": "node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Physical Memory", "refId": "A" }, { "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "Memory Used", "refId": "B" }, { "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "App Memory", "refId": "C" }, { "expr": "node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Wired Memory", "refId": "D" }, { "expr": "node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Compressed", "refId": "E" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 80 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 90 } ] }, "unit": "percent" } }, "gridPos": { }, "id": 5, "span": 3, "targets": [ { "expr": "(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"})\n)\n*\n100\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "" } ], "title": "Memory Usage", "transparent": false, "type": "gauge" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 6, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/ read| written/", "yaxis": 1 }, { "alias": "/ io time/", "yaxis": 2 } ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} read", "refId": "A" }, { "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} written", "refId": "B" }, { "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} io time", "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk I/O", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "yellow", "value": 0.8 }, { "color": "red", "value": 0.9 } ] }, "unit": "decbytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Mounted on" }, "properties": [ { "id": "custom.width", "value": 260 } ] }, { "matcher": { "id": "byName", "options": "Size" }, "properties": [ { "id": "custom.width", "value": 93 } ] }, { "matcher": { "id": "byName", "options": "Used" }, "properties": [ { "id": "custom.width", "value": 72 } ] }, { "matcher": { "id": "byName", "options": "Available" }, "properties": [ { "id": "custom.width", "value": 88 } ] }, { "matcher": { "id": "byName", "options": "Used, %" }, "properties": [ { "id": "unit", "value": "percentunit" }, { "id": "custom.displayMode", "value": "gradient-gauge" }, { "id": "max", "value": 1 }, { "id": "min", "value": 0 } ] } ] }, "gridPos": { }, "id": 7, "span": 6, "targets": [ { "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "" }, { "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "" } ], "title": "Disk Space Usage", "transformations": [ { "id": "groupBy", "options": { "fields": { "Value #A": { "aggregations": [ "lastNotNull" ], "operation": "aggregate" }, "Value #B": { "aggregations": [ "lastNotNull" ], "operation": "aggregate" }, "mountpoint": { "aggregations": [ ], "operation": "groupby" } } } }, { "id": "merge", "options": { } }, { "id": "calculateField", "options": { "alias": "Used", "binary": { "left": "Value #A (lastNotNull)", "operator": "-", "reducer": "sum", "right": "Value #B (lastNotNull)" }, "mode": "binary", "reduce": { "reducer": "sum" } } }, { "id": "calculateField", "options": { "alias": "Used, %", "binary": { "left": "Used", "operator": "/", "reducer": "sum", "right": "Value #A (lastNotNull)" }, "mode": "binary", "reduce": { "reducer": "sum" } } }, { "id": "organize", "options": { "excludeByName": { }, "indexByName": { }, "renameByName": { "Value #A (lastNotNull)": "Size", "Value #B (lastNotNull)": "Available", "mountpoint": "Mounted on" } } }, { "id": "sortBy", "options": { "fields": { }, "sort": [ { "field": "Mounted on" } ] } } ], "transparent": false, "type": "table" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Disk", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Network received (bits/s)", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 8, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Received", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Network transmitted (bits/s)", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Transmitted", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Network", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "node-exporter-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "Instance", "multi": false, "name": "instance", "options": [ ], "query": "label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, instance)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Node Exporter / MacOS", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-nodes annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: nodes.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 1, "hideControls": false, "id": null, "links": [ ], "refresh": "30s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 2, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", "format": "time_series", "intervalFactor": 5, "legendFormat": "{{cpu}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": 1, "min": 0, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": 1, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 3, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "1m load average", "refId": "A" }, { "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "5m load average", "refId": "B" }, { "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "15m load average", "refId": "C" }, { "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "logical cores", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Load Average", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "CPU", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 9, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory used", "refId": "A" }, { "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory buffers", "refId": "B" }, { "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory cached", "refId": "C" }, { "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory free", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 80 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 90 } ] }, "unit": "percent" } }, "gridPos": { }, "id": 5, "span": 3, "targets": [ { "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "" } ], "title": "Memory Usage", "transparent": false, "type": "gauge" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Memory", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 6, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "/ read| written/", "yaxis": 1 }, { "alias": "/ io time/", "yaxis": 2 } ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} read", "refId": "A" }, { "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} written", "refId": "B" }, { "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} io time", "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Disk I/O", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "yellow", "value": 0.8 }, { "color": "red", "value": 0.9 } ] }, "unit": "decbytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Mounted on" }, "properties": [ { "id": "custom.width", "value": 260 } ] }, { "matcher": { "id": "byName", "options": "Size" }, "properties": [ { "id": "custom.width", "value": 93 } ] }, { "matcher": { "id": "byName", "options": "Used" }, "properties": [ { "id": "custom.width", "value": 72 } ] }, { "matcher": { "id": "byName", "options": "Available" }, "properties": [ { "id": "custom.width", "value": 88 } ] }, { "matcher": { "id": "byName", "options": "Used, %" }, "properties": [ { "id": "unit", "value": "percentunit" }, { "id": "custom.displayMode", "value": "gradient-gauge" }, { "id": "max", "value": 1 }, { "id": "min", "value": 0 } ] } ] }, "gridPos": { }, "id": 7, "span": 6, "targets": [ { "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "" }, { "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "" } ], "title": "Disk Space Usage", "transformations": [ { "id": "groupBy", "options": { "fields": { "Value #A": { "aggregations": [ "lastNotNull" ], "operation": "aggregate" }, "Value #B": { "aggregations": [ "lastNotNull" ], "operation": "aggregate" }, "mountpoint": { "aggregations": [ ], "operation": "groupby" } } } }, { "id": "merge", "options": { } }, { "id": "calculateField", "options": { "alias": "Used", "binary": { "left": "Value #A (lastNotNull)", "operator": "-", "reducer": "sum", "right": "Value #B (lastNotNull)" }, "mode": "binary", "reduce": { "reducer": "sum" } } }, { "id": "calculateField", "options": { "alias": "Used, %", "binary": { "left": "Used", "operator": "/", "reducer": "sum", "right": "Value #A (lastNotNull)" }, "mode": "binary", "reduce": { "reducer": "sum" } } }, { "id": "organize", "options": { "excludeByName": { }, "indexByName": { }, "renameByName": { "Value #A (lastNotNull)": "Size", "Value #B (lastNotNull)": "Available", "mountpoint": "Mounted on" } } }, { "id": "sortBy", "options": { "fields": { }, "sort": [ { "field": "Mounted on" } ] } } ], "transparent": false, "type": "table" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Disk", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Network received (bits/s)", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 8, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Received", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Network transmitted (bits/s)", "fill": 0, "fillGradient": 0, "gridPos": { }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Transmitted", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Network", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "node-exporter-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "Instance", "multi": false, "name": "instance", "options": [ ], "query": "label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, instance)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Node Exporter / Nodes", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-persistentvolumesusage annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: persistentvolumesusage.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 9, "stack": true, "steppedLine": false, "targets": [ { "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used Space", "refId": "A" }, { "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Free Space", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Volume Space Usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "$datasource", "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "80, 90", "title": "Volume Space Usage", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": true, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 9, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used inodes", "refId": "A" }, { "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": " Free inodes", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Volume inodes Usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "none", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "$datasource", "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "80, 90", "title": "Volume inodes Usage", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [ ], "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "Namespace", "multi": false, "name": "namespace", "options": [ ], "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "PersistentVolumeClaim", "multi": false, "name": "volume", "options": [ ], "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-7d", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Persistent Volumes", "uid": "919b92a8e8041bd567af9edab12c840c", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-pod-total annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: pod-total.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 2, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 1 }, "height": 9, "id": 3, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "minSpan": 12, "nullPointMode": "connected", "nullText": null, "options": { "fieldOptions": { "calcs": [ "last" ], "defaults": { "max": 10000000000, "min": 0, "title": "$namespace: $pod", "unit": "Bps" }, "mappings": [ ], "override": { }, "thresholds": [ { "color": "dark-green", "index": 0, "value": null }, { "color": "dark-yellow", "index": 1, "value": 5000000000 }, { "color": "dark-red", "index": 2, "value": 7000000000 } ], "values": false } }, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 12, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", "format": "time_series", "instant": null, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Received", "type": "gauge", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 1 }, "height": 9, "id": 4, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "minSpan": 12, "nullPointMode": "connected", "nullText": null, "options": { "fieldOptions": { "calcs": [ "last" ], "defaults": { "max": 10000000000, "min": 0, "title": "$namespace: $pod", "unit": "Bps" }, "mappings": [ ], "override": { }, "thresholds": [ { "color": "dark-green", "index": 0, "value": null }, { "color": "dark-yellow", "index": 1, "value": 5000000000 }, { "color": "dark-red", "index": 2, "value": 7000000000 } ], "values": false } }, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 12, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", "format": "time_series", "instant": null, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Transmitted", "type": "gauge", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, "id": 5, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 11 }, "id": 6, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 11 }, "id": 7, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }, "id": 8, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 21 }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 21 }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Packets", "titleSize": "h6", "type": "row" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, "id": 11, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 32 }, "id": 12, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 32 }, "id": 13, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Errors", "titleSize": "h6", "type": "row" } ], "refresh": "10s", "rows": [ ], "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "kube-system", "value": "kube-system" }, "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "", "value": "" }, "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "pod", "options": [ ], "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "resolution", "options": [ { "selected": false, "text": "30s", "value": "30s" }, { "selected": true, "text": "5m", "value": "5m" }, { "selected": false, "text": "1h", "value": "1h" } ], "query": "30s,5m,1h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "interval", "options": [ { "selected": true, "text": "4h", "value": "4h" } ], "query": "4h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Networking / Pod", "uid": "7a18067ce943a40ae25454675c19ff5c", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-prometheus annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: prometheus.json: |- { "annotations": { "list": [ ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "links": [ ], "refresh": "60s", "rows": [ { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 1, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "styles": [ { "alias": "Time", "dateFormat": "YYYY-MM-DD HH:mm:ss", "pattern": "Time", "type": "hidden" }, { "alias": "Count", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", "thresholds": [ ], "type": "hidden", "unit": "short" }, { "alias": "Uptime", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", "thresholds": [ ], "type": "number", "unit": "s" }, { "alias": "Instance", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "instance", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Job", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "job", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "Version", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "version", "thresholds": [ ], "type": "number", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "/.*/", "thresholds": [ ], "type": "string", "unit": "short" } ], "targets": [ { "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 10 }, { "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "B", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Prometheus Stats", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "transform": "table", "type": "table", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Prometheus Stats", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{scrape_job}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Target Sync", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ms", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 3, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Targets", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Targets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Discovery", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "id": 4, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{interval}} configured", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Scrape Interval Duration", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ms", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 5, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "exceeded body size limit: {{job}}", "legendLink": null, "step": 10 }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "exceeded sample limit: {{job}}", "legendLink": null, "step": 10 }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "duplicate timestamp: {{job}}", "legendLink": null, "step": 10 }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "out of bounds: {{job}}", "legendLink": null, "step": 10 }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "out of order: {{job}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Scrape failures", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 6, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Appended Samples", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Retrieval", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 7, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}} head series", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Head Series", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}} head chunks", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Head Chunks", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Storage", "titleSize": "h6" }, { "collapse": false, "height": "250px", "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 9, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Query Rate", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, "id": 10, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": true, "steppedLine": false, "targets": [ { "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{slice}}", "legendLink": null, "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Stage Duration", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ms", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Query", "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ "prometheus-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": ".+", "current": { "selected": true, "text": "All", "value": "$__all" }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": "job", "multi": true, "name": "job", "options": [ ], "query": "label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, job)", "refresh": 1, "regex": "", "sort": 2, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", "current": { "selected": true, "text": "All", "value": "$__all" }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": "instance", "multi": true, "name": "instance", "options": [ ], "query": "label_values(prometheus_build_info{job=~\"$job\"}, instance)", "refresh": 1, "regex": "", "sort": 2, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Prometheus / Overview", "uid": "", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-proxy annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: proxy.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Up", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "min" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 5, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "rate", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rules Sync Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 5, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rule Sync Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "rate", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Programming Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Programming Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Kube API Request Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 8, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Post Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 11, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 12, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-proxy\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "instance", "options": [ ], "query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Proxy", "uid": "632e265de029684c40b21cb76bca4f94", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/scheduler.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-scheduler annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: scheduler.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ ] }, "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "refresh": "10s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { }, "id": 2, "interval": "1m", "legend": { "alignAsTable": true, "rightSide": true }, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Up", "tooltip": { "shared": false }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "min" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 3, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 5, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} e2e", "refId": "A" }, { "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} binding", "refId": "B" }, { "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} scheduling algorithm", "refId": "C" }, { "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} volume", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Scheduling Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 4, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 5, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} e2e", "refId": "A" }, { "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} binding", "refId": "B" }, { "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} scheduling algorithm", "refId": "C" }, { "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}} {{instance}} volume", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Scheduling latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 5, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", "refId": "D" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Kube API Request Rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "ops", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 6, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 8, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Post Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 7, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "s", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 8, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 9, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { }, "id": 10, "interval": "1m", "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": false, "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [ ], "query": "label_values(up{job=\"kube-scheduler\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "instance", "options": [ ], "query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Scheduler", "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 } --- # Source: kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: prometheus-kube-prometheus-workload-total annotations: {} labels: grafana_dashboard: "1" app: kube-prometheus-stack-grafana app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" data: workload-total.json: |- { "__inputs": [ ], "__requires": [ ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 2, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 1 }, "id": 3, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Received", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 1 }, "id": 4, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Current Rate of Bytes Transmitted", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, "id": 5, "panels": [ { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 11 }, "id": 6, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Rate of Bytes Received", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 11 }, "id": 7, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [ ], "minSpan": 24, "nullPointMode": "null", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 24, "stack": false, "steppedLine": false, "targets": [ { "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Average Rate of Bytes Transmitted", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "series", "name": null, "show": false, "values": [ "current" ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 11 }, "id": 8, "panels": [ ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Bandwidth HIstory", "titleSize": "h6", "type": "row" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 12 }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Receive Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 12 }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Transmit Bandwidth", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, "id": 11, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 22 }, "id": 12, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 22 }, "id": 13, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Packets", "titleSize": "h6", "type": "row" }, { "collapse": true, "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }, "id": 14, "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 23 }, "id": 15, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 2, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 23 }, "id": 16, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [ ], "minSpan": 12, "nullPointMode": "connected", "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 12, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A", "step": 10 } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "pps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, "title": "Errors", "titleSize": "h6", "type": "row" } ], "refresh": "10s", "rows": [ ], "schemaVersion": 18, "style": "dark", "tags": [ "kubernetes-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "cluster", "options": [ ], "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "kube-system", "value": "kube-system" }, "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "namespace", "options": [ ], "query": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "", "value": "" }, "datasource": "$datasource", "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "workload", "options": [ ], "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "deployment", "value": "deployment" }, "datasource": "$datasource", "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "type", "options": [ ], "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, "multi": false, "name": "resolution", "options": [ { "selected": false, "text": "30s", "value": "30s" }, { "selected": true, "text": "5m", "value": "5m" }, { "selected": false, "text": "1h", "value": "1h" } ], "query": "30s,5m,1h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false }, { "allValue": null, "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "text": "5m", "value": "5m" }, "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "interval", "options": [ { "selected": true, "text": "4h", "value": "4h" } ], "query": "4h", "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "interval", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "utc", "title": "Kubernetes / Networking / Workload", "uid": "728bf77cc1166d2f3133bf25846876cc", "version": 0 } --- # Source: kube-prometheus-stack/charts/grafana/templates/clusterrole.yaml kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm name: prometheus-grafana-clusterrole rules: - apiGroups: [""] # "" indicates the core API group resources: ["configmaps", "secrets"] verbs: ["get", "watch", "list"] --- # Source: kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus name: prometheus-kube-state-metrics rules: - apiGroups: ["certificates.k8s.io"] resources: - certificatesigningrequests verbs: ["list", "watch"] - apiGroups: [""] resources: - configmaps verbs: ["list", "watch"] - apiGroups: ["batch"] resources: - cronjobs verbs: ["list", "watch"] - apiGroups: ["extensions", "apps"] resources: - daemonsets verbs: ["list", "watch"] - apiGroups: ["extensions", "apps"] resources: - deployments verbs: ["list", "watch"] - apiGroups: [""] resources: - endpoints verbs: ["list", "watch"] - apiGroups: ["autoscaling"] resources: - horizontalpodautoscalers verbs: ["list", "watch"] - apiGroups: ["extensions", "networking.k8s.io"] resources: - ingresses verbs: ["list", "watch"] - apiGroups: ["batch"] resources: - jobs verbs: ["list", "watch"] - apiGroups: ["coordination.k8s.io"] resources: - leases verbs: ["list", "watch"] - apiGroups: [""] resources: - limitranges verbs: ["list", "watch"] - apiGroups: ["admissionregistration.k8s.io"] resources: - mutatingwebhookconfigurations verbs: ["list", "watch"] - apiGroups: [""] resources: - namespaces verbs: ["list", "watch"] - apiGroups: ["networking.k8s.io"] resources: - networkpolicies verbs: ["list", "watch"] - apiGroups: [""] resources: - nodes verbs: ["list", "watch"] - apiGroups: [""] resources: - persistentvolumeclaims verbs: ["list", "watch"] - apiGroups: [""] resources: - persistentvolumes verbs: ["list", "watch"] - apiGroups: ["policy"] resources: - poddisruptionbudgets verbs: ["list", "watch"] - apiGroups: [""] resources: - pods verbs: ["list", "watch"] - apiGroups: ["extensions", "apps"] resources: - replicasets verbs: ["list", "watch"] - apiGroups: [""] resources: - replicationcontrollers verbs: ["list", "watch"] - apiGroups: [""] resources: - resourcequotas verbs: ["list", "watch"] - apiGroups: [""] resources: - secrets verbs: ["list", "watch"] - apiGroups: [""] resources: - services verbs: ["list", "watch"] - apiGroups: ["apps"] resources: - statefulsets verbs: ["list", "watch"] - apiGroups: ["storage.k8s.io"] resources: - storageclasses verbs: ["list", "watch"] - apiGroups: ["admissionregistration.k8s.io"] resources: - validatingwebhookconfigurations verbs: ["list", "watch"] - apiGroups: ["storage.k8s.io"] resources: - volumeattachments verbs: ["list", "watch"] --- # Source: kube-prometheus-stack/templates/prometheus-operator/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus-kube-prometheus-operator labels: app: kube-prometheus-stack-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" rules: - apiGroups: - monitoring.coreos.com resources: - alertmanagers - alertmanagers/finalizers - alertmanagerconfigs - prometheuses - prometheuses/status - prometheuses/finalizers - thanosrulers - thanosrulers/finalizers - servicemonitors - podmonitors - probes - prometheusrules verbs: - '*' - apiGroups: - apps resources: - statefulsets verbs: - '*' - apiGroups: - "" resources: - configmaps - secrets verbs: - '*' - apiGroups: - "" resources: - pods verbs: - list - delete - apiGroups: - "" resources: - services - services/finalizers - endpoints verbs: - get - create - update - delete - apiGroups: - "" resources: - nodes verbs: - list - watch - apiGroups: - "" resources: - namespaces verbs: - get - list - watch - apiGroups: - networking.k8s.io resources: - ingresses verbs: - get - list - watch --- # Source: kube-prometheus-stack/templates/prometheus/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus-kube-prometheus-prometheus labels: app: kube-prometheus-stack-prometheus app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" rules: # This permission are not in the kube-prometheus repo # they're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml - apiGroups: [""] resources: - nodes - nodes/metrics - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: - "networking.k8s.io" resources: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics", "/metrics/cadvisor"] verbs: ["get"] --- # Source: kube-prometheus-stack/charts/grafana/templates/clusterrolebinding.yaml kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: prometheus-grafana-clusterrolebinding labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm subjects: - kind: ServiceAccount name: prometheus-grafana namespace: monitoring roleRef: kind: ClusterRole name: prometheus-grafana-clusterrole apiGroup: rbac.authorization.k8s.io --- # Source: kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus name: prometheus-kube-state-metrics roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus-kube-state-metrics subjects: - kind: ServiceAccount name: prometheus-kube-state-metrics namespace: monitoring --- # Source: kube-prometheus-stack/templates/prometheus-operator/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus-kube-prometheus-operator labels: app: kube-prometheus-stack-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus-kube-prometheus-operator subjects: - kind: ServiceAccount name: prometheus-kube-prometheus-operator namespace: monitoring --- # Source: kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus-kube-prometheus-prometheus labels: app: kube-prometheus-stack-prometheus app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus-kube-prometheus-prometheus subjects: - kind: ServiceAccount name: prometheus-kube-prometheus-prometheus namespace: monitoring --- # Source: kube-prometheus-stack/charts/grafana/templates/role.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm rules: [] --- # Source: kube-prometheus-stack/charts/grafana/templates/rolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: prometheus-grafana subjects: - kind: ServiceAccount name: prometheus-grafana namespace: monitoring --- # Source: kube-prometheus-stack/charts/grafana/templates/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP ports: - name: http-web port: 80 protocol: TCP targetPort: 3000 selector: app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus --- # Source: kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-state-metrics namespace: monitoring labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus annotations: prometheus.io/scrape: 'true' spec: type: "ClusterIP" ports: - name: "http" protocol: TCP port: 8080 targetPort: 8080 selector: app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus --- # Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-prometheus-node-exporter namespace: monitoring labels: helm.sh/chart: prometheus-node-exporter-4.8.1 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: prometheus-node-exporter app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "1.5.0" jobLabel: node-exporter release: prometheus annotations: prometheus.io/scrape: "true" spec: type: ClusterIP ports: - port: 9100 targetPort: 9100 protocol: TCP name: http-metrics selector: app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus --- # Source: kube-prometheus-stack/templates/alertmanager/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-alertmanager namespace: monitoring labels: app: kube-prometheus-stack-alertmanager self-monitor: "true" app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: ports: - name: http-web port: 9093 targetPort: 9093 protocol: TCP selector: app.kubernetes.io/name: alertmanager alertmanager: prometheus-kube-prometheus-alertmanager type: "ClusterIP" --- # Source: kube-prometheus-stack/templates/exporters/core-dns/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-coredns labels: app: kube-prometheus-stack-coredns jobLabel: coredns app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" namespace: kube-system spec: clusterIP: None ports: - name: http-metrics port: 9153 protocol: TCP targetPort: 9153 selector: k8s-app: kube-dns --- # Source: kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-kube-controller-manager labels: app: kube-prometheus-stack-kube-controller-manager jobLabel: kube-controller-manager app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" namespace: kube-system spec: clusterIP: None ports: - name: http-metrics port: 10252 protocol: TCP targetPort: 10252 selector: component: kube-controller-manager type: ClusterIP --- # Source: kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-kube-etcd labels: app: kube-prometheus-stack-kube-etcd jobLabel: kube-etcd app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" namespace: kube-system spec: clusterIP: None ports: - name: http-metrics port: 2381 protocol: TCP targetPort: 2381 selector: component: etcd type: ClusterIP --- # Source: kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-kube-proxy labels: app: kube-prometheus-stack-kube-proxy jobLabel: kube-proxy app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" namespace: kube-system spec: clusterIP: None ports: - name: http-metrics port: 10249 protocol: TCP targetPort: 10249 selector: k8s-app: kube-proxy type: ClusterIP --- # Source: kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-kube-scheduler labels: app: kube-prometheus-stack-kube-scheduler jobLabel: kube-scheduler app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" namespace: kube-system spec: clusterIP: None ports: - name: http-metrics port: 10251 protocol: TCP targetPort: 10251 selector: component: kube-scheduler type: ClusterIP --- # Source: kube-prometheus-stack/templates/prometheus-operator/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-operator namespace: monitoring labels: app: kube-prometheus-stack-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: ports: - name: https port: 443 targetPort: https selector: app: kube-prometheus-stack-operator release: "prometheus" type: "ClusterIP" --- # Source: kube-prometheus-stack/templates/prometheus/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus-kube-prometheus-prometheus namespace: monitoring labels: app: kube-prometheus-stack-prometheus self-monitor: "true" app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: ports: - name: http-web port: 9090 targetPort: 9090 publishNotReadyAddresses: false selector: app.kubernetes.io/name: prometheus prometheus: prometheus-kube-prometheus-prometheus type: "ClusterIP" --- # Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml apiVersion: apps/v1 kind: DaemonSet metadata: name: prometheus-prometheus-node-exporter namespace: monitoring labels: helm.sh/chart: prometheus-node-exporter-4.8.1 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: prometheus-node-exporter app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "1.5.0" jobLabel: node-exporter release: prometheus spec: selector: matchLabels: app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus updateStrategy: rollingUpdate: maxUnavailable: 1 type: RollingUpdate template: metadata: annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" labels: helm.sh/chart: prometheus-node-exporter-4.8.1 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: prometheus-node-exporter app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "1.5.0" jobLabel: node-exporter release: prometheus spec: automountServiceAccountToken: false securityContext: fsGroup: 65534 runAsGroup: 65534 runAsNonRoot: true runAsUser: 65534 serviceAccountName: prometheus-prometheus-node-exporter containers: - name: node-exporter image: quay.io/prometheus/node-exporter:v1.5.0 imagePullPolicy: IfNotPresent args: - --path.procfs=/host/proc - --path.sysfs=/host/sys - --path.rootfs=/host/root - --web.listen-address=[$(HOST_IP)]:9100 - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ env: - name: HOST_IP value: 0.0.0.0 ports: - name: http-metrics containerPort: 9100 protocol: TCP livenessProbe: failureThreshold: 3 httpGet: httpHeaders: path: / port: 9100 scheme: HTTP initialDelaySeconds: 0 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 readinessProbe: failureThreshold: 3 httpGet: httpHeaders: path: / port: 9100 scheme: HTTP initialDelaySeconds: 0 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 volumeMounts: - name: proc mountPath: /host/proc readOnly: true - name: sys mountPath: /host/sys readOnly: true - name: root mountPath: /host/root mountPropagation: HostToContainer readOnly: true hostNetwork: true hostPID: true tolerations: - effect: NoSchedule operator: Exists volumes: - name: proc hostPath: path: /proc - name: sys hostPath: path: /sys - name: root hostPath: path: / --- # Source: kube-prometheus-stack/charts/grafana/templates/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm spec: replicas: 1 revisionHistoryLimit: 10 selector: matchLabels: app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus strategy: type: RollingUpdate template: metadata: labels: app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus annotations: checksum/config: 13a89fd7932a141123505f22d32ef51a2e7bc268374f1a13cbc632fb278a16cb checksum/dashboards-json-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b checksum/sc-dashboard-provider-config: 65162b99e6bdb0f96ee95353a76effc7f575a61ee91fd9932ce83588c9add479 checksum/secret: 25451f0c2dfac29475034add0097960ac2e5e7e95d2b9997f437dafef739bf4f spec: serviceAccountName: prometheus-grafana automountServiceAccountToken: true securityContext: fsGroup: 472 runAsGroup: 472 runAsUser: 472 enableServiceLinks: true containers: - name: grafana-sc-dashboard image: "quay.io/kiwigrid/k8s-sidecar:1.22.0" imagePullPolicy: IfNotPresent env: - name: METHOD value: WATCH - name: LABEL value: "grafana_dashboard" - name: LABEL_VALUE value: "1" - name: FOLDER value: "/tmp/dashboards" - name: RESOURCE value: "both" - name: REQ_USERNAME valueFrom: secretKeyRef: name: prometheus-grafana key: admin-user - name: REQ_PASSWORD valueFrom: secretKeyRef: name: prometheus-grafana key: admin-password - name: REQ_URL value: http://localhost:3000/api/admin/provisioning/dashboards/reload - name: REQ_METHOD value: POST volumeMounts: - name: sc-dashboard-volume mountPath: "/tmp/dashboards" - name: grafana-sc-datasources image: "quay.io/kiwigrid/k8s-sidecar:1.22.0" imagePullPolicy: IfNotPresent env: - name: METHOD value: WATCH - name: LABEL value: "grafana_datasource" - name: LABEL_VALUE value: "1" - name: FOLDER value: "/etc/grafana/provisioning/datasources" - name: RESOURCE value: "both" - name: REQ_USERNAME valueFrom: secretKeyRef: name: prometheus-grafana key: admin-user - name: REQ_PASSWORD valueFrom: secretKeyRef: name: prometheus-grafana key: admin-password - name: REQ_URL value: http://localhost:3000/api/admin/provisioning/datasources/reload - name: REQ_METHOD value: POST volumeMounts: - name: sc-datasources-volume mountPath: "/etc/grafana/provisioning/datasources" - name: grafana image: "grafana/grafana:9.3.1" imagePullPolicy: IfNotPresent volumeMounts: - name: config mountPath: "/etc/grafana/grafana.ini" subPath: grafana.ini - name: storage mountPath: "/var/lib/grafana" - name: sc-dashboard-volume mountPath: "/tmp/dashboards" - name: sc-dashboard-provider mountPath: "/etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml" subPath: provider.yaml - name: sc-datasources-volume mountPath: "/etc/grafana/provisioning/datasources" ports: - name: grafana containerPort: 3000 protocol: TCP env: - name: GF_SECURITY_ADMIN_USER valueFrom: secretKeyRef: name: prometheus-grafana key: admin-user - name: GF_SECURITY_ADMIN_PASSWORD valueFrom: secretKeyRef: name: prometheus-grafana key: admin-password - name: GF_PATHS_DATA value: /var/lib/grafana/ - name: GF_PATHS_LOGS value: /var/log/grafana - name: GF_PATHS_PLUGINS value: /var/lib/grafana/plugins - name: GF_PATHS_PROVISIONING value: /etc/grafana/provisioning livenessProbe: failureThreshold: 10 httpGet: path: /api/health port: 3000 initialDelaySeconds: 60 timeoutSeconds: 30 readinessProbe: httpGet: path: /api/health port: 3000 volumes: - name: config configMap: name: prometheus-grafana - name: storage emptyDir: {} - name: sc-dashboard-volume emptyDir: {} - name: sc-dashboard-provider configMap: name: prometheus-grafana-config-dashboards - name: sc-datasources-volume emptyDir: {} --- # Source: kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-kube-state-metrics namespace: monitoring labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus spec: selector: matchLabels: app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus replicas: 1 template: metadata: labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus spec: hostNetwork: false serviceAccountName: prometheus-kube-state-metrics securityContext: fsGroup: 65534 runAsGroup: 65534 runAsUser: 65534 containers: - name: kube-state-metrics args: - --port=8080 - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments imagePullPolicy: IfNotPresent image: "registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0" ports: - containerPort: 8080 name: "http" livenessProbe: httpGet: path: /healthz port: 8080 initialDelaySeconds: 5 timeoutSeconds: 5 readinessProbe: httpGet: path: / port: 8080 initialDelaySeconds: 5 timeoutSeconds: 5 --- # Source: kube-prometheus-stack/templates/prometheus-operator/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-kube-prometheus-operator namespace: monitoring labels: app: kube-prometheus-stack-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: replicas: 1 selector: matchLabels: app: kube-prometheus-stack-operator release: "prometheus" template: metadata: labels: app: kube-prometheus-stack-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: containers: - name: kube-prometheus-stack image: "quay.io/prometheus-operator/prometheus-operator:v0.62.0" imagePullPolicy: "IfNotPresent" args: - --kubelet-service=kube-system/prometheus-kube-prometheus-kubelet - --localhost=127.0.0.1 - --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.62.0 - --config-reloader-cpu-request=200m - --config-reloader-cpu-limit=200m - --config-reloader-memory-request=50Mi - --config-reloader-memory-limit=50Mi - --thanos-default-base-image=quay.io/thanos/thanos:v0.30.1 - --web.enable-tls=true - --web.cert-file=/cert/cert - --web.key-file=/cert/key - --web.listen-address=:10250 - --web.tls-min-version=VersionTLS13 ports: - containerPort: 10250 name: https resources: {} securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true volumeMounts: - name: tls-secret mountPath: /cert readOnly: true volumes: - name: tls-secret secret: defaultMode: 420 secretName: prometheus-kube-prometheus-admission securityContext: fsGroup: 65534 runAsGroup: 65534 runAsNonRoot: true runAsUser: 65534 serviceAccountName: prometheus-kube-prometheus-operator --- # Source: kube-prometheus-stack/templates/alertmanager/alertmanager.yaml apiVersion: monitoring.coreos.com/v1 kind: Alertmanager metadata: name: prometheus-kube-prometheus-alertmanager namespace: monitoring labels: app: kube-prometheus-stack-alertmanager app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: image: "quay.io/prometheus/alertmanager:v0.25.0" version: v0.25.0 replicas: 1 listenLocal: false serviceAccountName: prometheus-kube-prometheus-alertmanager externalUrl: http://prometheus-kube-prometheus-alertmanager.monitoring:9093 paused: false logFormat: "logfmt" logLevel: "info" retention: "120h" alertmanagerConfigSelector: {} alertmanagerConfigNamespaceSelector: {} routePrefix: "/" securityContext: fsGroup: 2000 runAsGroup: 2000 runAsNonRoot: true runAsUser: 1000 portName: http-web --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: name: prometheus-kube-prometheus-admission labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" webhooks: - name: prometheusrulemutate.monitoring.coreos.com failurePolicy: Ignore rules: - apiGroups: - monitoring.coreos.com apiVersions: - "*" resources: - prometheusrules operations: - CREATE - UPDATE clientConfig: service: namespace: monitoring name: prometheus-kube-prometheus-operator path: /admission-prometheusrules/mutate timeoutSeconds: 10 admissionReviewVersions: ["v1", "v1beta1"] sideEffects: None --- # Source: kube-prometheus-stack/templates/prometheus/prometheus.yaml apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: name: prometheus-kube-prometheus-prometheus namespace: monitoring labels: app: kube-prometheus-stack-prometheus app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: alerting: alertmanagers: - namespace: monitoring name: prometheus-kube-prometheus-alertmanager port: http-web pathPrefix: "/" apiVersion: v2 image: "quay.io/prometheus/prometheus:v2.41.0" version: v2.41.0 externalUrl: http://prometheus-kube-prometheus-prometheus.monitoring:9090 paused: false replicas: 1 shards: 1 logLevel: info logFormat: logfmt listenLocal: false enableAdminAPI: false retention: "10d" walCompression: true routePrefix: "/" serviceAccountName: prometheus-kube-prometheus-prometheus serviceMonitorSelector: matchLabels: release: "prometheus" serviceMonitorNamespaceSelector: {} podMonitorSelector: matchLabels: release: "prometheus" podMonitorNamespaceSelector: {} probeSelector: matchLabels: release: "prometheus" probeNamespaceSelector: {} securityContext: fsGroup: 2000 runAsGroup: 2000 runAsNonRoot: true runAsUser: 1000 ruleNamespaceSelector: {} ruleSelector: matchLabels: release: "prometheus" portName: http-web hostNetwork: false --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-alertmanager.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: alertmanager.rules rules: - alert: AlertmanagerFailedReload annotations: description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload summary: Reloading an Alertmanager configuration has failed. expr: |- # Without max_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. max_over_time(alertmanager_config_last_reload_successful{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[5m]) == 0 for: 10m labels: severity: critical - alert: AlertmanagerMembersInconsistent annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent summary: A member of an Alertmanager cluster has not found all other cluster members. expr: |- # Without max_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. max_over_time(alertmanager_cluster_members{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[5m]) < on (namespace,service) group_left count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[5m])) for: 15m labels: severity: critical - alert: AlertmanagerFailedToSendAlerts annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts summary: An Alertmanager instance failed to send notifications. expr: |- ( rate(alertmanager_notifications_failed_total{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[5m]) / rate(alertmanager_notifications_total{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[5m]) ) > 0.01 for: 5m labels: severity: warning - alert: AlertmanagerClusterFailedToSendAlerts annotations: description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. expr: |- min by (namespace,service, integration) ( rate(alertmanager_notifications_failed_total{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring", integration=~`.*`}[5m]) / rate(alertmanager_notifications_total{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring", integration=~`.*`}[5m]) ) > 0.01 for: 5m labels: severity: critical - alert: AlertmanagerClusterFailedToSendAlerts annotations: description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. expr: |- min by (namespace,service, integration) ( rate(alertmanager_notifications_failed_total{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring", integration!~`.*`}[5m]) / rate(alertmanager_notifications_total{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring", integration!~`.*`}[5m]) ) > 0.01 for: 5m labels: severity: warning - alert: AlertmanagerConfigInconsistent annotations: description: Alertmanager instances within the {{$labels.job}} cluster have different configurations. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent summary: Alertmanager instances within the same cluster have different configurations. expr: |- count by (namespace,service) ( count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}) ) != 1 for: 20m labels: severity: critical - alert: AlertmanagerClusterDown annotations: description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown summary: Half or more of the Alertmanager instances within the same cluster are down. expr: |- ( count by (namespace,service) ( avg_over_time(up{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[5m]) < 0.5 ) / count by (namespace,service) ( up{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"} ) ) >= 0.5 for: 5m labels: severity: critical - alert: AlertmanagerClusterCrashlooping annotations: description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. expr: |- ( count by (namespace,service) ( changes(process_start_time_seconds{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"}[10m]) > 4 ) / count by (namespace,service) ( up{job="prometheus-kube-prometheus-alertmanager",namespace="monitoring"} ) ) >= 0.5 for: 5m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-config-reloaders namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: config-reloaders rules: - alert: ConfigReloaderSidecarErrors annotations: description: 'Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace. As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors summary: config-reloader sidecar has not had a successful reload for 10m expr: max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0 for: 10m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-etcd namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: etcd rules: - alert: etcdMembersDown annotations: description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value }}).' summary: etcd cluster members are down. expr: |- max without (endpoint) ( sum without (instance) (up{job=~".*etcd.*"} == bool 0) or count without (To) ( sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 ) ) > 0 for: 10m labels: severity: critical - alert: etcdInsufficientMembers annotations: description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).' summary: etcd cluster has insufficient number of members. expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2) for: 3m labels: severity: critical - alert: etcdNoLeader annotations: description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.' summary: etcd cluster has no leader. expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 for: 1m labels: severity: critical - alert: etcdHighNumberOfLeaderChanges annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' summary: etcd cluster has high number of leader changes. expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 for: 5m labels: severity: warning - alert: etcdHighNumberOfFailedGRPCRequests annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' summary: etcd cluster has high number of failed grpc requests. expr: |- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) / sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) > 1 for: 10m labels: severity: warning - alert: etcdHighNumberOfFailedGRPCRequests annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' summary: etcd cluster has high number of failed grpc requests. expr: |- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) / sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) > 5 for: 5m labels: severity: critical - alert: etcdGRPCRequestsSlow annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method }} method.' summary: etcd grpc requests are slow expr: |- histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) > 0.15 for: 10m labels: severity: critical - alert: etcdMemberCommunicationSlow annotations: description: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.' summary: etcd cluster member communication is slow. expr: |- histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) > 0.15 for: 10m labels: severity: warning - alert: etcdHighNumberOfFailedProposals annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.' summary: etcd cluster has high number of proposal failures. expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 for: 15m labels: severity: warning - alert: etcdHighFsyncDurations annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.' summary: etcd cluster 99th percentile fsync durations are too high. expr: |- histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) > 0.5 for: 10m labels: severity: warning - alert: etcdHighFsyncDurations annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.' summary: etcd cluster 99th percentile fsync durations are too high. expr: |- histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) > 1 for: 10m labels: severity: critical - alert: etcdHighCommitDurations annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.' summary: etcd cluster 99th percentile commit durations are too high. expr: |- histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) > 0.25 for: 10m labels: severity: warning - alert: etcdDatabaseQuotaLowSpace annotations: description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' summary: etcd cluster database is running full. expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95 for: 10m labels: severity: critical - alert: etcdExcessiveDatabaseGrowth annotations: description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.' summary: etcd cluster database growing very fast. expr: predict_linear(etcd_mvcc_db_total_size_in_bytes[4h], 4*60*60) > etcd_server_quota_backend_bytes for: 10m labels: severity: warning - alert: etcdDatabaseHighFragmentationRatio annotations: description: 'etcd cluster "{{ $labels.job }}": database size in use on instance {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation summary: etcd database size in use is less than 50% of the actual allocated storage. expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5 for: 10m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-general.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: general.rules rules: - alert: TargetDown annotations: description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown summary: One or more targets are unreachable. expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 for: 10m labels: severity: warning - alert: Watchdog annotations: description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty. ' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog summary: An alert that should always be firing to certify that Alertmanager is working properly. expr: vector(1) labels: severity: none - alert: InfoInhibitor annotations: description: 'This is an alert that is used to inhibit info alerts. By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with other alerts. This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a severity of ''warning'' or ''critical'' starts firing on the same namespace. This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". ' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor summary: Info-level alert inhibition. expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 labels: severity: none --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-k8s.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: k8s.rules rules: - expr: |- sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate - expr: |- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_working_set_bytes - expr: |- container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_rss - expr: |- container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_cache - expr: |- container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_swap - expr: |- kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests - expr: |- sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) record: namespace_memory:kube_pod_container_resource_requests:sum - expr: |- kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests - expr: |- sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) record: namespace_cpu:kube_pod_container_resource_requests:sum - expr: |- kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits - expr: |- sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) record: namespace_memory:kube_pod_container_resource_limits:sum - expr: |- kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits - expr: |- sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ) ) record: namespace_cpu:kube_pod_container_resource_limits:sum - expr: |- max by (cluster, namespace, workload, pod) ( label_replace( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, "replicaset", "$1", "owner_name", "(.*)" ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( 1, max by (replicaset, namespace, owner_name) ( kube_replicaset_owner{job="kube-state-metrics"} ) ), "workload", "$1", "owner_name", "(.*)" ) ) labels: workload_type: deployment record: namespace_workload_pod:kube_pod_owner:relabel - expr: |- max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, "workload", "$1", "owner_name", "(.*)" ) ) labels: workload_type: daemonset record: namespace_workload_pod:kube_pod_owner:relabel - expr: |- max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, "workload", "$1", "owner_name", "(.*)" ) ) labels: workload_type: statefulset record: namespace_workload_pod:kube_pod_owner:relabel - expr: |- max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, "workload", "$1", "owner_name", "(.*)" ) ) labels: workload_type: job record: namespace_workload_pod:kube_pod_owner:relabel --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-apiserver-availability.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - interval: 3m name: kube-apiserver-availability.rules rules: - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 record: code_verb:apiserver_request_total:increase30d - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: verb: read record: code:apiserver_request_total:increase30d - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) labels: verb: write record: code:apiserver_request_total:increase30d - expr: sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h])) record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h - expr: sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30) record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d - expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h])) record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h - expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30) record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d - expr: |- 1 - ( ( # write too slow sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) ) + ( # read too slow sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - ( ( sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) or vector(0) ) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) ) ) + # errors sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) ) / sum by (cluster) (code:apiserver_request_total:increase30d) labels: verb: all record: apiserver_request:availability30d - expr: |- 1 - ( sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - ( # too slow ( sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) or vector(0) ) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) ) + # errors sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) ) / sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"}) labels: verb: read record: apiserver_request:availability30d - expr: |- 1 - ( ( # too slow sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) ) + # errors sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) ) / sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"}) labels: verb: write record: apiserver_request:availability30d - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read record: code_resource:apiserver_request_total:rate5m - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write record: code_resource:apiserver_request_total:rate5m - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-apiserver-burnrate.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-apiserver-burnrate.rules rules: - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) labels: verb: read record: apiserver_request:burnrate1d - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) labels: verb: read record: apiserver_request:burnrate1h - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) labels: verb: read record: apiserver_request:burnrate2h - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) labels: verb: read record: apiserver_request:burnrate30m - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) labels: verb: read record: apiserver_request:burnrate3d - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read record: apiserver_request:burnrate5m - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - ( ( sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) or vector(0) ) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) + sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) ) ) + # errors sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) labels: verb: read record: apiserver_request:burnrate6h - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) labels: verb: write record: apiserver_request:burnrate1d - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) labels: verb: write record: apiserver_request:burnrate1h - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) labels: verb: write record: apiserver_request:burnrate2h - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) labels: verb: write record: apiserver_request:burnrate30m - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) labels: verb: write record: apiserver_request:burnrate3d - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write record: apiserver_request:burnrate5m - expr: |- ( ( # too slow sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) ) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) ) / sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) labels: verb: write record: apiserver_request:burnrate6h --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-apiserver-histogram.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-apiserver-histogram.rules rules: - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 labels: quantile: '0.99' verb: read record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 labels: quantile: '0.99' verb: write record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-apiserver-slos namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-apiserver-slos rules: - alert: KubeAPIErrorBudgetBurn annotations: description: The API server is burning too much error budget. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: |- sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) and sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) for: 2m labels: long: 1h severity: critical short: 5m - alert: KubeAPIErrorBudgetBurn annotations: description: The API server is burning too much error budget. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: |- sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) and sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) for: 15m labels: long: 6h severity: critical short: 30m - alert: KubeAPIErrorBudgetBurn annotations: description: The API server is burning too much error budget. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: |- sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) and sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) for: 1h labels: long: 1d severity: warning short: 2h - alert: KubeAPIErrorBudgetBurn annotations: description: The API server is burning too much error budget. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn summary: The API server is burning too much error budget. expr: |- sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) and sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) for: 3h labels: long: 3d severity: warning short: 6h --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-prometheus-general.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-prometheus-general.rules rules: - expr: count without(instance, pod, node) (up == 1) record: count:up1 - expr: count without(instance, pod, node) (up == 0) record: count:up0 --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-prometheus-node-recording.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-prometheus-node-recording.rules rules: - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) record: instance:node_cpu:rate:sum - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) record: instance:node_network_receive_bytes:rate:sum - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) record: instance:node_network_transmit_bytes:rate:sum - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) record: instance:node_cpu:ratio - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) record: cluster:node_cpu:sum_rate5m - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) record: cluster:node_cpu:ratio --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-scheduler.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-scheduler.rules rules: - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.99' record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.99' record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.99' record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.9' record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.9' record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.9' record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.5' record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.5' record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: '0.5' record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kube-state-metrics namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kube-state-metrics rules: - alert: KubeStateMetricsListErrors annotations: description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors summary: kube-state-metrics is experiencing errors in list operations. expr: |- (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) / sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m]))) > 0.01 for: 15m labels: severity: critical - alert: KubeStateMetricsWatchErrors annotations: description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors summary: kube-state-metrics is experiencing errors in watch operations. expr: |- (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) / sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m]))) > 0.01 for: 15m labels: severity: critical - alert: KubeStateMetricsShardingMismatch annotations: description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch summary: kube-state-metrics sharding is misconfigured. expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0 for: 15m labels: severity: critical - alert: KubeStateMetricsShardsMissing annotations: description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing summary: kube-state-metrics shards are missing. expr: |- 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1 - sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) != 0 for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubelet.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubelet.rules rules: - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) labels: quantile: '0.99' record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) labels: quantile: '0.9' record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) labels: quantile: '0.5' record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-apps namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-apps rules: - alert: KubePodCrashLooping annotations: description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff").' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping summary: Pod is crash looping. expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~".*"}[5m]) >= 1 for: 15m labels: severity: warning - alert: KubePodNotReady annotations: description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready summary: Pod has been in a non-ready state for more than 15 minutes. expr: |- sum by (namespace, pod, cluster) ( max by(namespace, pod, cluster) ( kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"} ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) ( 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}) ) ) > 0 for: 15m labels: severity: warning - alert: KubeDeploymentGenerationMismatch annotations: description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch summary: Deployment generation mismatch due to possible roll-back expr: |- kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"} != kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"} for: 15m labels: severity: warning - alert: KubeDeploymentReplicasMismatch annotations: description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch summary: Deployment has not matched the expected number of replicas. expr: |- ( kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"} > kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"} ) and ( changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) == 0 ) for: 15m labels: severity: warning - alert: KubeStatefulSetReplicasMismatch annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch summary: Deployment has not matched the expected number of replicas. expr: |- ( kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"} != kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"} ) and ( changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) == 0 ) for: 15m labels: severity: warning - alert: KubeStatefulSetGenerationMismatch annotations: description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch summary: StatefulSet generation mismatch due to possible roll-back expr: |- kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"} != kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"} for: 15m labels: severity: warning - alert: KubeStatefulSetUpdateNotRolledOut annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout summary: StatefulSet update has not been rolled out. expr: |- ( max without (revision) ( kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"} unless kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"} ) * ( kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"} != kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"} ) ) and ( changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m]) == 0 ) for: 15m labels: severity: warning - alert: KubeDaemonSetRolloutStuck annotations: description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck summary: DaemonSet rollout is stuck. expr: |- ( ( kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} != kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} ) or ( kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} != 0 ) or ( kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"} != kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} ) or ( kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"} != kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} ) ) and ( changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m]) == 0 ) for: 15m labels: severity: warning - alert: KubeContainerWaiting annotations: description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting summary: Pod container waiting longer than 1 hour expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~".*"}) > 0 for: 1h labels: severity: warning - alert: KubeDaemonSetNotScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled summary: DaemonSet pods are not scheduled. expr: |- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0 for: 10m labels: severity: warning - alert: KubeDaemonSetMisScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled summary: DaemonSet pods are misscheduled. expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} > 0 for: 15m labels: severity: warning - alert: KubeJobNotCompleted annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" | humanizeDuration }} to complete. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted summary: Job did not complete in time expr: |- time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"} and kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200 labels: severity: warning - alert: KubeJobFailed annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed summary: Job failed to complete. expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"} > 0 for: 15m labels: severity: warning - alert: KubeHpaReplicasMismatch annotations: description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch summary: HPA has not matched desired number of replicas. expr: |- (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"} != kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}) and (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} > kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"}) and (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} < kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}) and changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0 for: 15m labels: severity: warning - alert: KubeHpaMaxedOut annotations: description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout summary: HPA is running at max replicas expr: |- kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} == kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"} for: 15m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-resources namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-resources rules: - alert: KubeCPUOvercommit annotations: description: Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit summary: Cluster has overcommitted CPU resource requests. expr: |- sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 and (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 for: 10m labels: severity: warning - alert: KubeMemoryOvercommit annotations: description: Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit summary: Cluster has overcommitted memory resource requests. expr: |- sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 and (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 for: 10m labels: severity: warning - alert: KubeCPUQuotaOvercommit annotations: description: Cluster has overcommitted CPU resource requests for Namespaces. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit summary: Cluster has overcommitted CPU resource requests. expr: |- sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) / sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) > 1.5 for: 5m labels: severity: warning - alert: KubeMemoryQuotaOvercommit annotations: description: Cluster has overcommitted memory resource requests for Namespaces. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit summary: Cluster has overcommitted memory resource requests. expr: |- sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) / sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) > 1.5 for: 5m labels: severity: warning - alert: KubeQuotaAlmostFull annotations: description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull summary: Namespace quota is going to be full. expr: |- kube_resourcequota{job="kube-state-metrics", type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) > 0.9 < 1 for: 15m labels: severity: info - alert: KubeQuotaFullyUsed annotations: description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused summary: Namespace quota is fully used. expr: |- kube_resourcequota{job="kube-state-metrics", type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) == 1 for: 15m labels: severity: info - alert: KubeQuotaExceeded annotations: description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded summary: Namespace quota has exceeded the limits. expr: |- kube_resourcequota{job="kube-state-metrics", type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) > 1 for: 15m labels: severity: warning - alert: CPUThrottlingHigh annotations: description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh summary: Processes experience elevated CPU throttling. expr: |- sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace) / sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace) > ( 25 / 100 ) for: 15m labels: severity: info --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-storage namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-storage rules: - alert: KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup summary: PersistentVolume is filling up. expr: |- ( kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} / kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} ) < 0.03 and kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1m labels: severity: critical - alert: KubePersistentVolumeFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup summary: PersistentVolume is filling up. expr: |- ( kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} / kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} ) < 0.15 and kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1h labels: severity: warning - alert: KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup summary: PersistentVolumeInodes are filling up. expr: |- ( kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} / kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} ) < 0.03 and kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1m labels: severity: critical - alert: KubePersistentVolumeInodesFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup summary: PersistentVolumeInodes are filling up. expr: |- ( kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} / kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} ) < 0.15 and kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1h labels: severity: warning - alert: KubePersistentVolumeErrors annotations: description: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors summary: PersistentVolume is having issues with provisioning. expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 for: 5m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-system-apiserver namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-system-apiserver rules: - alert: KubeClientCertificateExpiration annotations: description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 for: 5m labels: severity: warning - alert: KubeClientCertificateExpiration annotations: description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 for: 5m labels: severity: critical - alert: KubeAggregatedAPIErrors annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors summary: Kubernetes aggregated API has reported errors. expr: sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total[10m])) > 4 labels: severity: warning - alert: KubeAggregatedAPIDown annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown summary: Kubernetes aggregated API is down. expr: (1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85 for: 5m labels: severity: warning - alert: KubeAPIDown annotations: description: KubeAPI has disappeared from Prometheus target discovery. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapidown summary: Target disappeared from Prometheus target discovery. expr: absent(up{job="apiserver"} == 1) for: 15m labels: severity: critical - alert: KubeAPITerminatedRequests annotations: description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests. expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 for: 5m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-system-controller-manager namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-system-controller-manager rules: - alert: KubeControllerManagerDown annotations: description: KubeControllerManager has disappeared from Prometheus target discovery. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown summary: Target disappeared from Prometheus target discovery. expr: absent(up{job="kube-controller-manager"} == 1) for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-system-kube-proxy namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-system-kube-proxy rules: - alert: KubeProxyDown annotations: description: KubeProxy has disappeared from Prometheus target discovery. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown summary: Target disappeared from Prometheus target discovery. expr: absent(up{job="kube-proxy"} == 1) for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-system-kubelet namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-system-kubelet rules: - alert: KubeNodeNotReady annotations: description: '{{ $labels.node }} has been unready for more than 15 minutes.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready summary: Node is not ready. expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 for: 15m labels: severity: warning - alert: KubeNodeUnreachable annotations: description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable summary: Node is unreachable. expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 for: 15m labels: severity: warning - alert: KubeletTooManyPods annotations: description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods summary: Kubelet is running at capacity. expr: |- count by(cluster, node) ( (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"}) ) / max by(cluster, node) ( kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 ) > 0.95 for: 15m labels: severity: info - alert: KubeNodeReadinessFlapping annotations: description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping summary: Node readiness status is flapping. expr: sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (cluster, node) > 2 for: 15m labels: severity: warning - alert: KubeletPlegDurationHigh annotations: description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 for: 5m labels: severity: warning - alert: KubeletPodStartUpLatencyHigh annotations: description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh summary: Kubelet Pod startup latency is too high. expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 for: 15m labels: severity: warning - alert: KubeletClientCertificateExpiration annotations: description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration summary: Kubelet client certificate is about to expire. expr: kubelet_certificate_manager_client_ttl_seconds < 604800 labels: severity: warning - alert: KubeletClientCertificateExpiration annotations: description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration summary: Kubelet client certificate is about to expire. expr: kubelet_certificate_manager_client_ttl_seconds < 86400 labels: severity: critical - alert: KubeletServerCertificateExpiration annotations: description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration summary: Kubelet server certificate is about to expire. expr: kubelet_certificate_manager_server_ttl_seconds < 604800 labels: severity: warning - alert: KubeletServerCertificateExpiration annotations: description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration summary: Kubelet server certificate is about to expire. expr: kubelet_certificate_manager_server_ttl_seconds < 86400 labels: severity: critical - alert: KubeletClientCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes). runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors summary: Kubelet has failed to renew its client certificate. expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 for: 15m labels: severity: warning - alert: KubeletServerCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes). runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors summary: Kubelet has failed to renew its server certificate. expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 for: 15m labels: severity: warning - alert: KubeletDown annotations: description: Kubelet has disappeared from Prometheus target discovery. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletdown summary: Target disappeared from Prometheus target discovery. expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1) for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-system-scheduler namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-system-scheduler rules: - alert: KubeSchedulerDown annotations: description: KubeScheduler has disappeared from Prometheus target discovery. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeschedulerdown summary: Target disappeared from Prometheus target discovery. expr: absent(up{job="kube-scheduler"} == 1) for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-kubernetes-system namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: kubernetes-system rules: - alert: KubeVersionMismatch annotations: description: There are {{ $value }} different semantic versions of Kubernetes components running. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch summary: Different semantic versions of Kubernetes components running. expr: count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 for: 15m labels: severity: warning - alert: KubeClientErrors annotations: description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors summary: Kubernetes API server client is experiencing errors. expr: |- (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (cluster, instance, job, namespace) / sum(rate(rest_client_requests_total[5m])) by (cluster, instance, job, namespace)) > 0.01 for: 15m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-node-exporter.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: node-exporter.rules rules: - expr: |- count without (cpu, mode) ( node_cpu_seconds_total{job="node-exporter",mode="idle"} ) record: instance:node_num_cpu:sum - expr: |- 1 - avg without (cpu) ( sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) ) record: instance:node_cpu_utilisation:rate5m - expr: |- ( node_load1{job="node-exporter"} / instance:node_num_cpu:sum{job="node-exporter"} ) record: instance:node_load1_per_cpu:ratio - expr: |- 1 - ( ( node_memory_MemAvailable_bytes{job="node-exporter"} or ( node_memory_Buffers_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Slab_bytes{job="node-exporter"} ) ) / node_memory_MemTotal_bytes{job="node-exporter"} ) record: instance:node_memory_utilisation:ratio - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) record: instance:node_vmstat_pgmajfault:rate5m - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) record: instance_device:node_disk_io_time_seconds:rate5m - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) record: instance_device:node_disk_io_time_weighted_seconds:rate5m - expr: |- sum without (device) ( rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) ) record: instance:node_network_receive_bytes_excluding_lo:rate5m - expr: |- sum without (device) ( rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) ) record: instance:node_network_transmit_bytes_excluding_lo:rate5m - expr: |- sum without (device) ( rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) ) record: instance:node_network_receive_drop_excluding_lo:rate5m - expr: |- sum without (device) ( rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) ) record: instance:node_network_transmit_drop_excluding_lo:rate5m --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-node-exporter namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: node-exporter rules: - alert: NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 24 hours. expr: |- ( node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 and predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: severity: warning - alert: NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 4 hours. expr: |- ( node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 and predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: severity: critical - alert: NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace summary: Filesystem has less than 5% space left. expr: |- ( node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 30m labels: severity: warning - alert: NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace summary: Filesystem has less than 3% space left. expr: |- ( node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 30m labels: severity: critical - alert: NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 24 hours. expr: |- ( node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 and predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: severity: warning - alert: NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 4 hours. expr: |- ( node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 and predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: severity: critical - alert: NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles summary: Filesystem has less than 5% inodes left. expr: |- ( node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: severity: warning - alert: NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles summary: Filesystem has less than 3% inodes left. expr: |- ( node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 and node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: severity: critical - alert: NodeNetworkReceiveErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs summary: Network interface is reporting many receive errors. expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 for: 1h labels: severity: warning - alert: NodeNetworkTransmitErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs summary: Network interface is reporting many transmit errors. expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 for: 1h labels: severity: warning - alert: NodeHighNumberConntrackEntriesUsed annotations: description: '{{ $value | humanizePercentage }} of conntrack entries are used.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused summary: Number of conntrack are getting close to the limit. expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 labels: severity: warning - alert: NodeTextFileCollectorScrapeError annotations: description: Node Exporter text file collector failed to scrape. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror summary: Node Exporter text file collector failed to scrape. expr: node_textfile_scrape_error{job="node-exporter"} == 1 labels: severity: warning - alert: NodeClockSkewDetected annotations: description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected summary: Clock skew detected. expr: |- ( node_timex_offset_seconds{job="node-exporter"} > 0.05 and deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 ) or ( node_timex_offset_seconds{job="node-exporter"} < -0.05 and deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 ) for: 10m labels: severity: warning - alert: NodeClockNotSynchronising annotations: description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising summary: Clock not synchronising. expr: |- min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 and node_timex_maxerror_seconds{job="node-exporter"} >= 16 for: 10m labels: severity: warning - alert: NodeRAIDDegraded annotations: description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded summary: RAID Array is degraded expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 for: 15m labels: severity: critical - alert: NodeRAIDDiskFailure annotations: description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure summary: Failed device in RAID array expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 labels: severity: warning - alert: NodeFileDescriptorLimit annotations: description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit summary: Kernel is predicted to exhaust file descriptors limit soon. expr: |- ( node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 ) for: 15m labels: severity: warning - alert: NodeFileDescriptorLimit annotations: description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit summary: Kernel is predicted to exhaust file descriptors limit soon. expr: |- ( node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 ) for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-node-network namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: node-network rules: - alert: NodeNetworkInterfaceFlapping annotations: description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping summary: Network interface is often changing its status expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 for: 2m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-node.rules namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: node.rules rules: - expr: |- topk by(cluster, namespace, pod) (1, max by (cluster, node, namespace, pod) ( label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") )) record: 'node_namespace_pod:kube_pod_info:' - expr: |- count by (cluster, node) ( node_cpu_seconds_total{mode="idle",job="node-exporter"} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:) ) record: node:node_num_cpu:sum - expr: |- sum( node_memory_MemAvailable_bytes{job="node-exporter"} or ( node_memory_Buffers_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Slab_bytes{job="node-exporter"} ) ) by (cluster) record: :node_memory_MemAvailable_bytes:sum - expr: |- avg by (cluster, node) ( sum without (mode) ( rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) ) ) record: node:node_cpu_utilization:ratio_rate5m - expr: |- avg by (cluster) ( node:node_cpu_utilization:ratio_rate5m ) record: cluster:node_cpu:ratio_rate5m --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus-operator.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-prometheus-operator namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: prometheus-operator rules: - alert: PrometheusOperatorListErrors annotations: description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors summary: Errors while performing list operations in controller. expr: (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 for: 15m labels: severity: warning - alert: PrometheusOperatorWatchErrors annotations: description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors summary: Errors while performing watch operations in controller. expr: (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]))) > 0.4 for: 15m labels: severity: warning - alert: PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed summary: Last controller reconciliation failed expr: min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]) > 0 for: 10m labels: severity: warning - alert: PrometheusOperatorReconcileErrors annotations: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors summary: Errors while reconciling controller. expr: (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]))) > 0.1 for: 10m labels: severity: warning - alert: PrometheusOperatorNodeLookupErrors annotations: description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors summary: Errors while reconciling Prometheus. expr: rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]) > 0.1 for: 10m labels: severity: warning - alert: PrometheusOperatorNotReady annotations: description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready summary: Prometheus operator not ready expr: min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]) == 0) for: 5m labels: severity: warning - alert: PrometheusOperatorRejectedResources annotations: description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources summary: Resources rejected by Prometheus operator expr: min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-kube-prometheus-operator",namespace="monitoring"}[5m]) > 0 for: 5m labels: severity: warning --- # Source: kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus-kube-prometheus-prometheus namespace: monitoring labels: app: kube-prometheus-stack app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: groups: - name: prometheus rules: - alert: PrometheusBadConfig annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig summary: Failed Prometheus configuration reload. expr: |- # Without max_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. max_over_time(prometheus_config_last_reload_successful{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) == 0 for: 10m labels: severity: critical - alert: PrometheusNotificationQueueRunningFull annotations: description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull summary: Prometheus alert notification queue predicted to run full in less than 30m. expr: |- # Without min_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. ( predict_linear(prometheus_notifications_queue_length{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m], 60 * 30) > min_over_time(prometheus_notifications_queue_capacity{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) ) for: 15m labels: severity: warning - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers annotations: description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. expr: |- ( rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) ) * 100 > 1 for: 15m labels: severity: warning - alert: PrometheusNotConnectedToAlertmanagers annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers summary: Prometheus is not connected to any Alertmanagers. expr: |- # Without max_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) < 1 for: 10m labels: severity: warning - alert: PrometheusTSDBReloadsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing summary: Prometheus has issues reloading blocks from disk. expr: increase(prometheus_tsdb_reloads_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[3h]) > 0 for: 4h labels: severity: warning - alert: PrometheusTSDBCompactionsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing summary: Prometheus has issues compacting blocks. expr: increase(prometheus_tsdb_compactions_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[3h]) > 0 for: 4h labels: severity: warning - alert: PrometheusNotIngestingSamples annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples summary: Prometheus is not ingesting samples. expr: |- ( rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) <= 0 and ( sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}) > 0 or sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}) > 0 ) ) for: 10m labels: severity: warning - alert: PrometheusDuplicateTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps summary: Prometheus is dropping samples with duplicate timestamps. expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 10m labels: severity: warning - alert: PrometheusOutOfOrderTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps summary: Prometheus drops samples with out-of-order timestamps. expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 10m labels: severity: warning - alert: PrometheusRemoteStorageFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }} runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures summary: Prometheus fails to send samples to remote storage. expr: |- ( (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m])) / ( (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m])) + (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m])) ) ) * 100 > 1 for: 15m labels: severity: critical - alert: PrometheusRemoteWriteBehind annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind summary: Prometheus remote write is behind. expr: |- # Without max_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. ( max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) - ignoring(remote_name, url) group_right max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) ) > 120 for: 15m labels: severity: critical - alert: PrometheusRemoteWriteDesiredShards annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}` $labels.instance | query | first | value }}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. expr: |- # Without max_over_time, failed scrapes could create false negatives, see # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. ( max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > max_over_time(prometheus_remote_storage_shards_max{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) ) for: 15m labels: severity: warning - alert: PrometheusRuleFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures summary: Prometheus is failing rule evaluations. expr: increase(prometheus_rule_evaluation_failures_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 15m labels: severity: critical - alert: PrometheusMissingRuleEvaluations annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations summary: Prometheus is missing rule evaluations due to slow rule group evaluation. expr: increase(prometheus_rule_group_iterations_missed_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 15m labels: severity: warning - alert: PrometheusTargetLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 15m labels: severity: warning - alert: PrometheusLabelLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 15m labels: severity: warning - alert: PrometheusScrapeBodySizeLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit summary: Prometheus has dropped some targets that exceeded body size limit. expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 15m labels: severity: warning - alert: PrometheusScrapeSampleLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit summary: Prometheus has failed scrapes that have exceeded the configured sample limit. expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0 for: 15m labels: severity: warning - alert: PrometheusTargetSyncFailure annotations: description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure summary: Prometheus has failed to sync targets. expr: increase(prometheus_target_sync_failed_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[30m]) > 0 for: 5m labels: severity: critical - alert: PrometheusHighQueryLoad annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload summary: Prometheus is reaching its maximum capacity serving concurrent requests. expr: avg_over_time(prometheus_engine_queries{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-kube-prometheus-prometheus",namespace="monitoring"}[5m]) > 0.8 for: 15m labels: severity: warning - alert: PrometheusErrorSendingAlertsToAnyAlertmanager annotations: description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. expr: |- min without (alertmanager) ( rate(prometheus_notifications_errors_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring",alertmanager!~``}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-kube-prometheus-prometheus",namespace="monitoring",alertmanager!~``}[5m]) ) * 100 > 3 for: 15m labels: severity: critical --- # Source: kube-prometheus-stack/charts/grafana/templates/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-grafana namespace: monitoring labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm spec: endpoints: - port: http-web scrapeTimeout: 30s honorLabels: true path: /metrics scheme: http jobLabel: "prometheus" selector: matchLabels: app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus namespaceSelector: matchNames: - monitoring --- # Source: kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-state-metrics namespace: monitoring labels: helm.sh/chart: kube-state-metrics-4.24.0 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: kube-state-metrics app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "2.7.0" release: prometheus spec: jobLabel: app.kubernetes.io/name selector: matchLabels: app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: prometheus endpoints: - port: http honorLabels: true --- # Source: kube-prometheus-stack/charts/prometheus-node-exporter/templates/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-prometheus-node-exporter namespace: monitoring labels: helm.sh/chart: prometheus-node-exporter-4.8.1 app.kubernetes.io/managed-by: Helm app.kubernetes.io/component: metrics app.kubernetes.io/part-of: prometheus-node-exporter app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "1.5.0" jobLabel: node-exporter release: prometheus spec: jobLabel: jobLabel selector: matchLabels: app.kubernetes.io/name: prometheus-node-exporter app.kubernetes.io/instance: prometheus endpoints: - port: http-metrics scheme: http --- # Source: kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-alertmanager namespace: monitoring labels: app: kube-prometheus-stack-alertmanager app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: selector: matchLabels: app: kube-prometheus-stack-alertmanager release: "prometheus" self-monitor: "true" namespaceSelector: matchNames: - "monitoring" endpoints: - port: http-web enableHttp2: true path: "/metrics" --- # Source: kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-coredns namespace: monitoring labels: app: kube-prometheus-stack-coredns app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: jobLabel: jobLabel selector: matchLabels: app: kube-prometheus-stack-coredns release: "prometheus" namespaceSelector: matchNames: - "kube-system" endpoints: - port: http-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token --- # Source: kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-apiserver namespace: monitoring labels: app: kube-prometheus-stack-apiserver app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token port: https scheme: https metricRelabelings: - action: drop regex: apiserver_request_duration_seconds_bucket;(0.15|0.2|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2|3|3.5|4|4.5|6|7|8|9|15|25|40|50) sourceLabels: - __name__ - le tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt serverName: kubernetes insecureSkipVerify: false jobLabel: component namespaceSelector: matchNames: - default selector: matchLabels: component: apiserver provider: kubernetes --- # Source: kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-kube-controller-manager namespace: monitoring labels: app: kube-prometheus-stack-kube-controller-manager app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: jobLabel: jobLabel selector: matchLabels: app: kube-prometheus-stack-kube-controller-manager release: "prometheus" namespaceSelector: matchNames: - "kube-system" endpoints: - port: http-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token --- # Source: kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-kube-etcd namespace: monitoring labels: app: kube-prometheus-stack-kube-etcd app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: jobLabel: jobLabel selector: matchLabels: app: kube-prometheus-stack-kube-etcd release: "prometheus" namespaceSelector: matchNames: - "kube-system" endpoints: - port: http-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token --- # Source: kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-kube-proxy namespace: monitoring labels: app: kube-prometheus-stack-kube-proxy app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: jobLabel: jobLabel selector: matchLabels: app: kube-prometheus-stack-kube-proxy release: "prometheus" namespaceSelector: matchNames: - "kube-system" endpoints: - port: http-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token --- # Source: kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-kube-scheduler namespace: monitoring labels: app: kube-prometheus-stack-kube-scheduler app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: jobLabel: jobLabel selector: matchLabels: app: kube-prometheus-stack-kube-scheduler release: "prometheus" namespaceSelector: matchNames: - "kube-system" endpoints: - port: http-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token --- # Source: kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-kubelet namespace: monitoring labels: app: kube-prometheus-stack-kubelet app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: endpoints: - port: https-metrics scheme: https tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecureSkipVerify: true bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true relabelings: - action: replace sourceLabels: - __metrics_path__ targetLabel: metrics_path - port: https-metrics scheme: https path: /metrics/cadvisor honorLabels: true tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecureSkipVerify: true bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token metricRelabelings: - action: drop regex: container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total) sourceLabels: - __name__ - action: drop regex: container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total) sourceLabels: - __name__ - action: drop regex: container_memory_(mapped_file|swap) sourceLabels: - __name__ - action: drop regex: container_(file_descriptors|tasks_state|threads_max) sourceLabels: - __name__ - action: drop regex: container_spec.* sourceLabels: - __name__ - action: drop regex: .+; sourceLabels: - id - pod relabelings: - action: replace sourceLabels: - __metrics_path__ targetLabel: metrics_path - port: https-metrics scheme: https path: /metrics/probes honorLabels: true tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecureSkipVerify: true bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token relabelings: - action: replace sourceLabels: - __metrics_path__ targetLabel: metrics_path jobLabel: k8s-app namespaceSelector: matchNames: - kube-system selector: matchLabels: app.kubernetes.io/name: kubelet k8s-app: kubelet --- # Source: kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-operator namespace: monitoring labels: app: kube-prometheus-stack-operator app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: endpoints: - port: https scheme: https tlsConfig: serverName: prometheus-kube-prometheus-operator ca: secret: name: prometheus-kube-prometheus-admission key: ca optional: false honorLabels: true selector: matchLabels: app: kube-prometheus-stack-operator release: "prometheus" namespaceSelector: matchNames: - "monitoring" --- # Source: kube-prometheus-stack/templates/prometheus/servicemonitor.yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus-kube-prometheus-prometheus namespace: monitoring labels: app: kube-prometheus-stack-prometheus app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: selector: matchLabels: app: kube-prometheus-stack-prometheus release: "prometheus" self-monitor: "true" namespaceSelector: matchNames: - "monitoring" endpoints: - port: http-web path: "/metrics" --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: prometheus-kube-prometheus-admission labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" webhooks: - name: prometheusrulemutate.monitoring.coreos.com failurePolicy: Ignore rules: - apiGroups: - monitoring.coreos.com apiVersions: - "*" resources: - prometheusrules operations: - CREATE - UPDATE clientConfig: service: namespace: monitoring name: prometheus-kube-prometheus-operator path: /admission-prometheusrules/validate timeoutSeconds: 10 admissionReviewVersions: ["v1", "v1beta1"] sideEffects: None --- # Source: kube-prometheus-stack/charts/grafana/templates/tests/test-serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm name: prometheus-grafana-test namespace: monitoring annotations: "helm.sh/hook": test-success "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus-kube-prometheus-admission namespace: monitoring annotations: "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" --- # Source: kube-prometheus-stack/charts/grafana/templates/tests/test-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: prometheus-grafana-test namespace: monitoring annotations: "helm.sh/hook": test-success "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm data: run.sh: |- @test "Test Health" { url="http://prometheus-grafana/api/health" code=$(wget --server-response --spider --timeout 90 --tries 10 ${url} 2>&1 | awk '/^ HTTP/{print $2}') [ "$code" == "200" ] } --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus-kube-prometheus-admission annotations: "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" rules: - apiGroups: - admissionregistration.k8s.io resources: - validatingwebhookconfigurations - mutatingwebhookconfigurations verbs: - get - update --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus-kube-prometheus-admission annotations: "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus-kube-prometheus-admission subjects: - kind: ServiceAccount name: prometheus-kube-prometheus-admission namespace: monitoring --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: prometheus-kube-prometheus-admission namespace: monitoring annotations: "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" rules: - apiGroups: - "" resources: - secrets verbs: - get - create --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: prometheus-kube-prometheus-admission namespace: monitoring annotations: "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: prometheus-kube-prometheus-admission subjects: - kind: ServiceAccount name: prometheus-kube-prometheus-admission namespace: monitoring --- # Source: kube-prometheus-stack/charts/grafana/templates/tests/test.yaml apiVersion: v1 kind: Pod metadata: name: prometheus-grafana-test labels: helm.sh/chart: grafana-6.50.2 app.kubernetes.io/name: grafana app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "9.3.1" app.kubernetes.io/managed-by: Helm annotations: "helm.sh/hook": test-success "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" namespace: monitoring spec: serviceAccountName: prometheus-grafana-test containers: - name: prometheus-test image: "bats/bats:v1.4.1" imagePullPolicy: "IfNotPresent" command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"] volumeMounts: - mountPath: /tests name: tests readOnly: true volumes: - name: tests configMap: name: prometheus-grafana-test restartPolicy: Never --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml apiVersion: batch/v1 kind: Job metadata: name: prometheus-kube-prometheus-admission-create namespace: monitoring annotations: "helm.sh/hook": pre-install,pre-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission-create app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: template: metadata: name: prometheus-kube-prometheus-admission-create labels: app: kube-prometheus-stack-admission-create app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: containers: - name: create image: registry.k8s.io/ingress-nginx/kube-webhook-certgen:v1.3.0 imagePullPolicy: IfNotPresent args: - create - --host=prometheus-kube-prometheus-operator,prometheus-kube-prometheus-operator.monitoring.svc - --namespace=monitoring - --secret-name=prometheus-kube-prometheus-admission securityContext: {} resources: {} restartPolicy: OnFailure serviceAccountName: prometheus-kube-prometheus-admission securityContext: runAsGroup: 2000 runAsNonRoot: true runAsUser: 2000 --- # Source: kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml apiVersion: batch/v1 kind: Job metadata: name: prometheus-kube-prometheus-admission-patch namespace: monitoring annotations: "helm.sh/hook": post-install,post-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded labels: app: kube-prometheus-stack-admission-patch app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: template: metadata: name: prometheus-kube-prometheus-admission-patch labels: app: kube-prometheus-stack-admission-patch app.kubernetes.io/managed-by: Helm app.kubernetes.io/instance: prometheus app.kubernetes.io/version: "44.3.0" app.kubernetes.io/part-of: kube-prometheus-stack chart: kube-prometheus-stack-44.3.0 release: "prometheus" heritage: "Helm" spec: containers: - name: patch image: registry.k8s.io/ingress-nginx/kube-webhook-certgen:v1.3.0 imagePullPolicy: IfNotPresent args: - patch - --webhook-name=prometheus-kube-prometheus-admission - --namespace=monitoring - --secret-name=prometheus-kube-prometheus-admission - --patch-failure-policy= securityContext: {} resources: {} restartPolicy: OnFailure serviceAccountName: prometheus-kube-prometheus-admission securityContext: runAsGroup: 2000 runAsNonRoot: true runAsUser: 2000