2178 lines
55 KiB
JSON
2178 lines
55 KiB
JSON
{
|
|
"__inputs": [
|
|
{
|
|
"name": "DS_PROMETHEUS",
|
|
"label": "Prometheus",
|
|
"description": "",
|
|
"type": "datasource",
|
|
"pluginId": "prometheus",
|
|
"pluginName": "Prometheus"
|
|
}
|
|
],
|
|
"__elements": {},
|
|
"__requires": [
|
|
{
|
|
"type": "panel",
|
|
"id": "bargauge",
|
|
"name": "Bar gauge",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "gauge",
|
|
"name": "Gauge",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "grafana",
|
|
"id": "grafana",
|
|
"name": "Grafana",
|
|
"version": "11.5.2"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "heatmap",
|
|
"name": "Heatmap",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "datasource",
|
|
"id": "prometheus",
|
|
"name": "Prometheus",
|
|
"version": "1.0.0"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "stat",
|
|
"name": "Stat",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "timeseries",
|
|
"name": "Time series",
|
|
"version": ""
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": null,
|
|
"links": [],
|
|
"liveNow": true,
|
|
"panels": [
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 15,
|
|
"panels": [],
|
|
"title": "Runner Performance",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Heat map showing the typical time before a job starts and whether the number of jobs in that time bucket are increasing or decreasing.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"custom": {
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
}
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 7,
|
|
"x": 0,
|
|
"y": 1
|
|
},
|
|
"id": 7,
|
|
"options": {
|
|
"calculate": false,
|
|
"cellGap": 1,
|
|
"color": {
|
|
"exponent": 0.5,
|
|
"fill": "dark-orange",
|
|
"mode": "scheme",
|
|
"reverse": false,
|
|
"scale": "exponential",
|
|
"scheme": "Turbo",
|
|
"steps": 64
|
|
},
|
|
"exemplars": {
|
|
"color": "rgba(255,0,255,0.7)"
|
|
},
|
|
"filterValues": {
|
|
"le": 1e-9
|
|
},
|
|
"legend": {
|
|
"show": true
|
|
},
|
|
"rowsFrame": {
|
|
"layout": "auto"
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"showColorScale": false,
|
|
"yHistogram": false
|
|
},
|
|
"yAxis": {
|
|
"axisLabel": "Wait Time",
|
|
"axisPlacement": "left",
|
|
"reverse": false,
|
|
"unit": "s"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "sum by(le) (increase(gha_job_startup_duration_seconds_bucket{actions_github_com_scale_set_name=~\"$Scaleset\", actions_github_com_scale_set_namespace=~\"$RunnerNamespace\"}[$__rate_interval]))",
|
|
"format": "heatmap",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"interval": "",
|
|
"legendFormat": "{{le}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Startup Duration",
|
|
"type": "heatmap"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Heat map showing the typical time to complete a job and whether the number of jobs in that time bucket are increasing or decreasing.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"custom": {
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
}
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 7,
|
|
"y": 1
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"calculate": false,
|
|
"cellGap": 1,
|
|
"color": {
|
|
"exponent": 0.5,
|
|
"fill": "dark-orange",
|
|
"mode": "scheme",
|
|
"reverse": false,
|
|
"scale": "exponential",
|
|
"scheme": "Spectral",
|
|
"steps": 64
|
|
},
|
|
"exemplars": {
|
|
"color": "rgba(255,0,255,0.7)"
|
|
},
|
|
"filterValues": {
|
|
"le": 1e-9
|
|
},
|
|
"legend": {
|
|
"show": true
|
|
},
|
|
"rowsFrame": {
|
|
"layout": "auto"
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"showColorScale": false,
|
|
"yHistogram": false
|
|
},
|
|
"yAxis": {
|
|
"axisLabel": "Time",
|
|
"axisPlacement": "left",
|
|
"reverse": false,
|
|
"unit": "s"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "sum by(le) (increase(gha_job_execution_duration_seconds_bucket{actions_github_com_scale_set_name=~\"$Scaleset\", actions_github_com_scale_set_namespace=~\"$RunnerNamespace\"}[$__rate_interval]))",
|
|
"format": "heatmap",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": false,
|
|
"instant": false,
|
|
"legendFormat": "{{le}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Job Execution",
|
|
"type": "heatmap"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of jobs assigned to the scale set. The threshold is triggered with the number of assigned jobs exceeds the number of desired runners. This indicates that not all jobs will have an available runner.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"min": 0,
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"id": 9,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_assigned_jobs{namespace=~\"$RunnerNamespace\", actions_github_com_scale_set_name=~\"$Scaleset\"})",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_desired_runners{namespace=~\"$RunnerNamespace\", actions_github_com_scale_set_name=~\"$Scaleset\"}) + 1",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "DesiredRunners"
|
|
}
|
|
],
|
|
"title": "Assigned Jobs",
|
|
"transformations": [
|
|
{
|
|
"id": "configFromData",
|
|
"options": {
|
|
"configRefId": "DesiredRunners",
|
|
"mappings": [
|
|
{
|
|
"fieldName": "Time",
|
|
"handlerKey": "__ignore"
|
|
},
|
|
{
|
|
"fieldName": "sum(gha_desired_runners{namespace=~\"(arc-runners|arc-runners-dind|arc-runners-k8s)\", actions_github_com_scale_set_name=~\"(arc-runner-set|dind-runner-set|k8s-runner-set)\"}) + 1",
|
|
"handlerKey": "threshold1"
|
|
}
|
|
]
|
|
}
|
|
}
|
|
],
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Number of runners desired by the scale set",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 8
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_desired_runners{actions_github_com_scale_set_name=~\"$Scaleset\", namespace=~\"$RunnerNamespace\"})",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Desired Runners",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Number of registered runners that do not have assigned jobs.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 8
|
|
},
|
|
"id": 2,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_idle_runners{actions_github_com_scale_set_name=~\"$Scaleset\", namespace=~\"$RunnerNamespace\"})",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Idle Runners",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of workflow jobs currently executing",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 9,
|
|
"y": 8
|
|
},
|
|
"id": 10,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum (gha_running_jobs{actions_github_com_scale_set_name=~\"$Scaleset\", actions_github_com_scale_set_namespace=~\"$RunnerNamespace\"})",
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Running Jobs",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of runners in a failed state. These runners are typically misconfigured and count against the scale set's maximum limit.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"id": 26,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_controller_failed_ephemeral_runners{name=~\"$Scaleset\", namespace=~\"$RunnerNamespace\"})",
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Failed Runners",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of active scale set listeners",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"fieldMinMax": true,
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 13
|
|
},
|
|
"id": 5,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_controller_running_listeners{namespace=~\"$SystemNamespace\"})",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Listeners",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Number of runner pods that are waiting to be created. When this number exceeds the number of pods Kubernetes reports as Waiting, it indicate cluster performance issues.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 13
|
|
},
|
|
"id": 3,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_controller_pending_ephemeral_runners{name=~\"$Scaleset\", namespace=~\"$RunnerNamespace\"})",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(kube_pod_container_status_waiting{namespace=~\"$RunnerNamespace\"}) != 0 or vector(0)",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "Waiting"
|
|
}
|
|
],
|
|
"title": "Pending Runners",
|
|
"transformations": [
|
|
{
|
|
"id": "configFromData",
|
|
"options": {
|
|
"configRefId": "Waiting",
|
|
"mappings": [
|
|
{
|
|
"fieldName": "Time",
|
|
"handlerKey": "__ignore"
|
|
},
|
|
{
|
|
"fieldName": "sum(kube_pod_container_status_waiting{namespace=~\"(arc-runners|arc-runners-dind|arc-runners-k8s)\"}) != 0 or vector(0)",
|
|
"handlerKey": "threshold1"
|
|
}
|
|
]
|
|
}
|
|
}
|
|
],
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of runners registered for processing queued jobs",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 13
|
|
},
|
|
"id": 8,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_registered_runners{actions_github_com_scale_set_name=~\"$Scaleset\", namespace=~\"$RunnerNamespace\"})",
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Registered Runners",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Number of runner pods in a running state",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 9,
|
|
"y": 13
|
|
},
|
|
"id": 1,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"expr": "max(gha_controller_running_ephemeral_runners{name=~\"$Scaleset\", namespace=~\"$RunnerNamespace\"})",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Active Runners",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of containers that are reporting that they were terminated by an out-of-memory condition (OOMK.iller)",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"noValue": "No issues detected",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "semi-dark-red",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 12,
|
|
"y": 13
|
|
},
|
|
"id": 23,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\",namespace=~\"$RunnerNamespace\"}) by (namespace)",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Out of Memory",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The peak memory used by a container in a given scale set's namespace.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"fixedColor": "semi-dark-green",
|
|
"mode": "shades"
|
|
},
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 5,
|
|
"x": 0,
|
|
"y": 18
|
|
},
|
|
"id": 12,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": false,
|
|
"sizing": "auto"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "max(container_memory_working_set_bytes{namespace=~\"$RunnerNamespace\"}) by (namespace)",
|
|
"format": "time_series",
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Peak Container Memory",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The sum of the reads and writes occurring within the runner namespace.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Bytes",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 54,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "smooth",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"log": 2,
|
|
"type": "log"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 6,
|
|
"x": 5,
|
|
"y": 18
|
|
},
|
|
"id": 13,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(rate(container_fs_writes_bytes_total{namespace=~\"$RunnerNamespace\"}[$__rate_interval])) > 0 or vector(0)",
|
|
"instant": false,
|
|
"legendFormat": "Write",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(rate(container_fs_reads_bytes_total{namespace=~\"$RunnerNamespace\"}[$__rate_interval])) > 0 or vector(0)",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "Read",
|
|
"range": true,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Container I/O",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The Kubernetes-reported pod status.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"min": 0,
|
|
"noValue": "No active pods",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "yellow",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 11,
|
|
"y": 18
|
|
},
|
|
"id": 11,
|
|
"options": {
|
|
"displayMode": "lcd",
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": false
|
|
},
|
|
"maxVizHeight": 300,
|
|
"minVizHeight": 10,
|
|
"minVizWidth": 0,
|
|
"namePlacement": "auto",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showUnfilled": true,
|
|
"sizing": "auto",
|
|
"valueMode": "color"
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "sum(kube_pod_container_status_ready{namespace=~\"$RunnerNamespace\"}) != 0 or vector(0)",
|
|
"format": "time_series",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"interval": "",
|
|
"legendFormat": "Ready",
|
|
"range": true,
|
|
"refId": "B",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(kube_pod_container_status_waiting{namespace=~\"$RunnerNamespace\"}) != 0 or vector(0)",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "Waiting",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(kube_pod_container_status_terminated_reason{namespace=~\"$RunnerNamespace\", reason=\"Completed\"}) != 0 or vector(0)",
|
|
"hide": false,
|
|
"instant": false,
|
|
"interval": "",
|
|
"legendFormat": "Completed",
|
|
"range": true,
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(kube_pod_container_status_terminated_reason{namespace=~\"$RunnerNamespace\", reason=\"Error\"}) != 0 or vector(0)",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "Error",
|
|
"range": true,
|
|
"refId": "E"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "code",
|
|
"expr": "sum(gha_desired_runners)+1",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "DesiredRunners",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Container Pod Status",
|
|
"transformations": [
|
|
{
|
|
"id": "configFromData",
|
|
"options": {
|
|
"applyTo": {
|
|
"id": "byName",
|
|
"options": "Ready"
|
|
},
|
|
"configRefId": "DesiredRunners",
|
|
"mappings": [
|
|
{
|
|
"fieldName": "Time",
|
|
"handlerKey": "__ignore"
|
|
},
|
|
{
|
|
"fieldName": "sum(gha_desired_runners) + 1",
|
|
"handlerKey": "threshold1"
|
|
},
|
|
{
|
|
"fieldName": "sum(gha_desired_runners) -5",
|
|
"handlerKey": "threshold1"
|
|
}
|
|
]
|
|
}
|
|
}
|
|
],
|
|
"type": "bargauge"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"id": 16,
|
|
"panels": [],
|
|
"title": "Controller Performance",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The average time required for a reconciliation request to be processed. This reflects the time required for the controller to process a single request to modify a Kubernetes resource.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 33,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "smooth",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 25
|
|
},
|
|
"id": 17,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(controller_runtime_reconcile_time_seconds_sum{namespace=\"$SystemNamespace\"}[$__rate_interval])",
|
|
"interval": "",
|
|
"legendFormat": "{{controller}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Reconcile Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The average time a queued reconciliation request spends waiting to be processed.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 27,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 25
|
|
},
|
|
"id": 18,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(workqueue_queue_duration_seconds_sum{namespace=\"$SystemNamespace\"}[$__rate_interval])",
|
|
"legendFormat": "{{controller}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Workqueue Queue Duration",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Errors indicate that controller has not achieved a desired state and is requesting Kubernetes to queue another request for reconciliation. Ideally, this number remains close to zero. An increasing number can indicate resource contention or delays processing API server requests.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 33,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "smooth",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 25
|
|
},
|
|
"id": 27,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(controller_runtime_reconcile_errors_total{namespace=\"$SystemNamespace\"}[$__rate_interval])",
|
|
"interval": "",
|
|
"legendFormat": "{{controller}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Reconciliation Errors",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of reconcile requests that are waiting to be processed by the controller. A growing queue depth can indicate that the Kubernetes API Server or the controller does not have enough resources. This can lead to pods taking longer to be deleted or started. ",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"fieldMinMax": false,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 100
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 33
|
|
},
|
|
"id": 20,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"max"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum (workqueue_depth{namespace=\"$SystemNamespace\"}) by (name)",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Queue Depth",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of workers that are currently being used to process reconcile requests. Increasing this number can reduce the work queue duration, but each new worker adds a small amount of time due to context switching.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 33
|
|
},
|
|
"id": 21,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"max"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum by (controller) (controller_runtime_active_workers)",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Active Workers",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The number of calls to the API server",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 27,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "smooth",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 33
|
|
},
|
|
"id": 19,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum by (method, code) (rate(rest_client_requests_total{namespace=\"$SystemNamespace\"}[$__rate_interval]))",
|
|
"format": "time_series",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "API Calls",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 41
|
|
},
|
|
"id": 25,
|
|
"panels": [],
|
|
"title": "Metrics",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "The time required by Prometheus to read and process metrics. Long scrape times can delay metrics updates or lead to metrics loss. Increasing time often indicates issues with metrics cardinality or cluster resources.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineStyle": {
|
|
"fill": "solid"
|
|
},
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "never",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 18,
|
|
"x": 0,
|
|
"y": 42
|
|
},
|
|
"id": 24,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.5.2",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "scrape_duration_seconds",
|
|
"instant": false,
|
|
"legendFormat": "{{job}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
}
|
|
}
|
|
],
|
|
"title": "Scrape Duration",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"refresh": "5s",
|
|
"schemaVersion": 40,
|
|
"tags": [],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"definition": "label_values(gha_controller_running_listeners,namespace)",
|
|
"description": "The ARC system namespace",
|
|
"includeAll": true,
|
|
"label": "ARC System Namespace",
|
|
"multi": true,
|
|
"name": "SystemNamespace",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(gha_controller_running_listeners,namespace)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"type": "query"
|
|
},
|
|
{
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"definition": "label_values(gha_desired_runners,actions_github_com_scale_set_name)",
|
|
"description": "The name of the runner scale set",
|
|
"includeAll": true,
|
|
"label": "Scale Set",
|
|
"multi": true,
|
|
"name": "Scaleset",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(gha_desired_runners,actions_github_com_scale_set_name)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 2,
|
|
"regex": "",
|
|
"type": "query"
|
|
},
|
|
{
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"definition": "label_values(gha_desired_runners{actions_github_com_scale_set_name=~\"$Scaleset\"},namespace)",
|
|
"description": "Namespace containing the runners",
|
|
"includeAll": true,
|
|
"label": "Runner Namespace",
|
|
"multi": true,
|
|
"name": "RunnerNamespace",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(gha_desired_runners{actions_github_com_scale_set_name=~\"$Scaleset\"},namespace)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 2,
|
|
"regex": "",
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-15m",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": [
|
|
"5s",
|
|
"10s",
|
|
"30s",
|
|
"1m",
|
|
"5m",
|
|
"15m",
|
|
"30m",
|
|
"1h",
|
|
"2h",
|
|
"1d",
|
|
"7d"
|
|
]
|
|
},
|
|
"timezone": "",
|
|
"title": "ARC Autoscaling Runner Set Monitoring",
|
|
"uid": "af21e938-2151-4bf2-b798-8cf9232f947a",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|