-
Notifications
You must be signed in to change notification settings - Fork 36
/
AddonPolicyMetricsProfile.rules.json
551 lines (551 loc) · 29.7 KB
/
AddonPolicyMetricsProfile.rules.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
{
"if": {
"field": "type",
"equals": "Microsoft.ContainerService/managedClusters"
},
"then": {
"effect": "deployIfNotExists",
"details": {
"type": "Microsoft.ContainerService/managedClusters",
"name": "[field('name')]",
"roleDefinitionIds": [
"/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c"
],
"existenceCondition": {
"field": "Microsoft.ContainerService/managedClusters/azureMonitorProfile.metrics.enabled",
"equals": "true"
},
"deployment": {
"properties": {
"mode": "incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"azureMonitorWorkspaceResourceId": {
"type": "string"
},
"azureMonitorWorkspaceLocation": {
"type": "string",
"defaultValue": ""
},
"metricLabelsAllowlist": {
"type": "string",
"defaultValue": ""
},
"metricAnnotationsAllowList": {
"type": "string",
"defaultValue": ""
},
"enableWindowsRecordingRules": {
"type": "bool"
},
"clusterResourceId": {
"type": "string"
},
"clusterLocation": {
"type": "string"
}
},
"variables": {
"clusterSubscriptionId": "[split(parameters('clusterResourceId'),'/')[2]]",
"clusterResourceGroup": "[split(parameters('clusterResourceId'),'/')[4]]",
"clusterName": "[split(parameters('clusterResourceId'),'/')[8]]",
"dceName": "[substring(concat('MSProm', '-', parameters('azureMonitorWorkspaceLocation'), '-', variables('clusterName')), 0, min(44, length(concat('MSProm', '-', parameters('azureMonitorWorkspaceLocation'), '-', variables('clusterName')))))]",
"dcrName": "[substring(concat('MSProm', '-', parameters('azureMonitorWorkspaceLocation'), '-', variables('clusterName')), 0, min(64, length(concat('MSProm', '-', parameters('azureMonitorWorkspaceLocation'), '-', variables('clusterName')))))]",
"dcraName": "[Concat('MSProm', '-', parameters('clusterLocation'), '-', variables('clusterName'))]",
"nodeRecordingRuleGroup": "NodeRecordingRulesRuleGroup-",
"nodeRecordingRuleGroupName": "[concat(variables('nodeRecordingRuleGroup'), variables('clusterName'))]",
"nodeRecordingRuleGroupDescription": "Node Recording Rules RuleGroup",
"kubernetesRecordingRuleGroup": "KubernetesRecordingRulesRuleGroup-",
"kubernetesRecordingRuleGroupName": "[concat(variables('kubernetesRecordingRuleGroup'), variables('clusterName'))]",
"kubernetesRecordingRuleGroupDescription": "Kubernetes Recording Rules RuleGroup",
"nodeRecordingRuleGroupWin": "NodeRecordingRulesRuleGroup-Win-",
"nodeAndKubernetesRecordingRuleGroupWin": "NodeAndKubernetesRecordingRulesRuleGroup-Win-",
"nodeRecordingRuleGroupNameWin": "[concat(variables('nodeRecordingRuleGroupWin'), variables('clusterName'))]",
"nodeAndKubernetesRecordingRuleGroupNameWin": "[concat(variables('nodeAndKubernetesRecordingRuleGroupWin'), variables('clusterName'))]",
"RecordingRuleGroupDescriptionWin": "Kubernetes Recording Rules RuleGroup for Win",
"version": " - 0.1"
},
"resources": [
{
"type": "Microsoft.Insights/dataCollectionEndpoints",
"apiVersion": "2022-06-01",
"name": "[variables('dceName')]",
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"kind": "Linux",
"properties": {}
},
{
"type": "Microsoft.Insights/dataCollectionRules",
"apiVersion": "2022-06-01",
"name": "[variables('dcrName')]",
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"kind": "Linux",
"properties": {
"dataCollectionEndpointId": "[resourceId('Microsoft.Insights/dataCollectionEndpoints/', variables('dceName'))]",
"dataFlows": [
{
"destinations": [
"MonitoringAccount1"
],
"streams": [
"Microsoft-PrometheusMetrics"
]
}
],
"dataSources": {
"prometheusForwarder": [
{
"name": "PrometheusDataSource",
"streams": [
"Microsoft-PrometheusMetrics"
],
"labelIncludeFilter": {}
}
]
},
"description": "DCR for Azure Monitor Metrics Profile (Managed Prometheus)",
"destinations": {
"monitoringAccounts": [
{
"accountResourceId": "[parameters('azureMonitorWorkspaceResourceId')]",
"name": "MonitoringAccount1"
}
]
}
},
"dependsOn": [
"[resourceId('Microsoft.Insights/dataCollectionEndpoints/', variables('dceName'))]"
]
},
{
"type": "Microsoft.Resources/deployments",
"name": "[Concat('azuremonitormetrics-dcra', '-', uniqueString(parameters('clusterResourceId')))]",
"apiVersion": "2017-05-10",
"subscriptionId": "[variables('clusterSubscriptionId')]",
"resourceGroup": "[variables('clusterResourceGroup')]",
"dependsOn": [
"[resourceId('Microsoft.Insights/dataCollectionEndpoints/', variables('dceName'))]",
"[resourceId('Microsoft.Insights/dataCollectionRules', variables('dcrName'))]"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {},
"variables": {},
"resources": [
{
"type": "Microsoft.ContainerService/managedClusters/providers/dataCollectionRuleAssociations",
"name": "[concat(variables('clusterName'),'/microsoft.insights/', variables('dcraName'))]",
"apiVersion": "2022-06-01",
"location": "[parameters('clusterLocation')]",
"properties": {
"description": "Association of data collection rule. Deleting this association will break the data collection for this AKS Cluster.",
"dataCollectionRuleId": "[resourceId('Microsoft.Insights/dataCollectionRules', variables('dcrName'))]"
}
}
]
},
"parameters": {}
}
},
{
"type": "Microsoft.Resources/deployments",
"name": "[Concat('azuremonitormetrics-profile-', '-', uniqueString(parameters('clusterResourceId')))]",
"apiVersion": "2017-05-10",
"subscriptionId": "[variables('clusterSubscriptionId')]",
"resourceGroup": "[variables('clusterResourceGroup')]",
"dependsOn": [
"[Concat('azuremonitormetrics-dcra', '-', uniqueString(parameters('clusterResourceId')))]"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {},
"variables": {},
"resources": [
{
"name": "[variables('clusterName')]",
"type": "Microsoft.ContainerService/managedClusters",
"location": "[parameters('clusterLocation')]",
"apiVersion": "2023-01-01",
"properties": {
"mode": "Incremental",
"id": "[parameters('clusterResourceId')]",
"azureMonitorProfile": {
"metrics": {
"enabled": true,
"kubeStateMetrics": {
"metricLabelsAllowlist": "[parameters('metricLabelsAllowlist')]",
"metricAnnotationsAllowList": "[parameters('metricAnnotationsAllowList')]"
}
}
}
}
}
]
},
"parameters": {}
}
},
{
"name": "[variables('nodeRecordingRuleGroupName')]",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"properties": {
"description": "[concat(variables('nodeRecordingRuleGroupDescription'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
],
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
"rules": [
{
"record": "instance:node_num_cpu:sum",
"expression": "count without (cpu, mode) ( node_cpu_seconds_total{job=\"node\",mode=\"idle\"})"
},
{
"record": "instance:node_cpu_utilisation:rate5m",
"expression": "1 - avg without (cpu) ( sum without (mode) (rate(node_cpu_seconds_total{job=\"node\", mode=~\"idle|iowait|steal\"}[5m])))"
},
{
"record": "instance:node_load1_per_cpu:ratio",
"expression": "( node_load1{job=\"node\"}/ instance:node_num_cpu:sum{job=\"node\"})"
},
{
"record": "instance:node_memory_utilisation:ratio",
"expression": "1 - ( ( node_memory_MemAvailable_bytes{job=\"node\"} or ( node_memory_Buffers_bytes{job=\"node\"} + node_memory_Cached_bytes{job=\"node\"} + node_memory_MemFree_bytes{job=\"node\"} + node_memory_Slab_bytes{job=\"node\"} ) )/ node_memory_MemTotal_bytes{job=\"node\"})"
},
{
"record": "instance:node_vmstat_pgmajfault:rate5m",
"expression": "rate(node_vmstat_pgmajfault{job=\"node\"}[5m])"
},
{
"record": "instance_device:node_disk_io_time_seconds:rate5m",
"expression": "rate(node_disk_io_time_seconds_total{job=\"node\", device!=\"\"}[5m])"
},
{
"record": "instance_device:node_disk_io_time_weighted_seconds:rate5m",
"expression": "rate(node_disk_io_time_weighted_seconds_total{job=\"node\", device!=\"\"}[5m])"
},
{
"record": "instance:node_network_receive_bytes_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_receive_bytes_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "instance:node_network_transmit_bytes_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_transmit_bytes_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "instance:node_network_receive_drop_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_receive_drop_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "instance:node_network_transmit_drop_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_transmit_drop_total{job=\"node\", device!=\"lo\"}[5m]))"
}
]
}
},
{
"name": "[variables('kubernetesRecordingRuleGroupName')]",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"properties": {
"description": "[concat(variables('kubernetesRecordingRuleGroupDescription'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
],
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
"rules": [
{
"record": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate",
"expression": "sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_working_set_bytes",
"expression": "container_memory_working_set_bytes{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_rss",
"expression": "container_memory_rss{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_cache",
"expression": "container_memory_cache{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_swap",
"expression": "container_memory_swap{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests",
"expression": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))"
},
{
"record": "namespace_memory:kube_pod_container_resource_requests:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests",
"expression": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))"
},
{
"record": "namespace_cpu:kube_pod_container_resource_requests:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits",
"expression": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))"
},
{
"record": "namespace_memory:kube_pod_container_resource_limits:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits",
"expression": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1) )"
},
{
"record": "namespace_cpu:kube_pod_container_resource_limits:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.*)\" ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( 1, max by (replicaset, namespace, owner_name) ( kube_replicaset_owner{job=\"kube-state-metrics\"} ) ), \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "deployment"
}
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "daemonset"
}
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "statefulset"
}
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "job"
}
},
{
"record": ":node_memory_MemAvailable_bytes:sum",
"expression": "sum( node_memory_MemAvailable_bytes{job=\"node\"} or ( node_memory_Buffers_bytes{job=\"node\"} + node_memory_Cached_bytes{job=\"node\"} + node_memory_MemFree_bytes{job=\"node\"} + node_memory_Slab_bytes{job=\"node\"} )) by (cluster)"
},
{
"record": "cluster:node_cpu:ratio_rate5m",
"expression": "sum(rate(node_cpu_seconds_total{job=\"node\",mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])) by (cluster) /count(sum(node_cpu_seconds_total{job=\"node\"}) by (cluster, instance, cpu)) by (cluster)"
}
]
}
},
{
"name": "[variables('nodeRecordingRuleGroupNameWin')]",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
],
"enabled": "[parameters('enableWindowsRecordingRules')]",
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
"rules": [
{
"record": "node:windows_node:sum",
"expression": "count (windows_system_system_up_time{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_num_cpu:sum",
"expression": "count by (instance) (sum by (instance, core) (windows_cpu_time_total{job=\"windows-exporter\"}))"
},
{
"record": ":windows_node_cpu_utilisation:avg5m",
"expression": "1 - avg(rate(windows_cpu_time_total{job=\"windows-exporter\",mode=\"idle\"}[5m]))"
},
{
"record": "node:windows_node_cpu_utilisation:avg5m",
"expression": "1 - avg by (instance) (rate(windows_cpu_time_total{job=\"windows-exporter\",mode=\"idle\"}[5m]))"
},
{
"record": ":windows_node_memory_utilisation:",
"expression": "1 -sum(windows_memory_available_bytes{job=\"windows-exporter\"})/sum(windows_os_visible_memory_bytes{job=\"windows-exporter\"})"
},
{
"record": ":windows_node_memory_MemFreeCached_bytes:sum",
"expression": "sum(windows_memory_available_bytes{job=\"windows-exporter\"} + windows_memory_cache_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_memory_totalCached_bytes:sum",
"expression": "(windows_memory_cache_bytes{job=\"windows-exporter\"} + windows_memory_modified_page_list_bytes{job=\"windows-exporter\"} + windows_memory_standby_cache_core_bytes{job=\"windows-exporter\"} + windows_memory_standby_cache_normal_priority_bytes{job=\"windows-exporter\"} + windows_memory_standby_cache_reserve_bytes{job=\"windows-exporter\"})"
},
{
"record": ":windows_node_memory_MemTotal_bytes:sum",
"expression": "sum(windows_os_visible_memory_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_memory_bytes_available:sum",
"expression": "sum by (instance) ((windows_memory_available_bytes{job=\"windows-exporter\"}))"
},
{
"record": "node:windows_node_memory_bytes_total:sum",
"expression": "sum by (instance) (windows_os_visible_memory_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_memory_utilisation:ratio",
"expression": "(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum) / scalar(sum(node:windows_node_memory_bytes_total:sum))"
},
{
"record": "node:windows_node_memory_utilisation:",
"expression": "1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)"
},
{
"record": "node:windows_node_memory_swap_io_pages:irate",
"expression": "irate(windows_memory_swap_page_operations_total{job=\"windows-exporter\"}[5m])"
},
{
"record": ":windows_node_disk_utilisation:avg_irate",
"expression": "avg(irate(windows_logical_disk_read_seconds_total{job=\"windows-exporter\"}[5m]) + irate(windows_logical_disk_write_seconds_total{job=\"windows-exporter\"}[5m]))"
},
{
"record": "node:windows_node_disk_utilisation:avg_irate",
"expression": "avg by (instance) ((irate(windows_logical_disk_read_seconds_total{job=\"windows-exporter\"}[5m]) + irate(windows_logical_disk_write_seconds_total{job=\"windows-exporter\"}[5m])))"
}
]
}
},
{
"name": "[variables('nodeAndKubernetesRecordingRuleGroupNameWin')]",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
],
"enabled": "[parameters('enableWindowsRecordingRules')]",
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
"rules": [
{
"record": "node:windows_node_filesystem_usage:",
"expression": "max by (instance,volume)((windows_logical_disk_size_bytes{job=\"windows-exporter\"} - windows_logical_disk_free_bytes{job=\"windows-exporter\"}) / windows_logical_disk_size_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_filesystem_avail:",
"expression": "max by (instance, volume) (windows_logical_disk_free_bytes{job=\"windows-exporter\"} / windows_logical_disk_size_bytes{job=\"windows-exporter\"})"
},
{
"record": ":windows_node_net_utilisation:sum_irate",
"expression": "sum(irate(windows_net_bytes_total{job=\"windows-exporter\"}[5m]))"
},
{
"record": "node:windows_node_net_utilisation:sum_irate",
"expression": "sum by (instance) ((irate(windows_net_bytes_total{job=\"windows-exporter\"}[5m])))"
},
{
"record": ":windows_node_net_saturation:sum_irate",
"expression": "sum(irate(windows_net_packets_received_discarded_total{job=\"windows-exporter\"}[5m])) + sum(irate(windows_net_packets_outbound_discarded_total{job=\"windows-exporter\"}[5m]))"
},
{
"record": "node:windows_node_net_saturation:sum_irate",
"expression": "sum by (instance) ((irate(windows_net_packets_received_discarded_total{job=\"windows-exporter\"}[5m]) + irate(windows_net_packets_outbound_discarded_total{job=\"windows-exporter\"}[5m])))"
},
{
"record": "windows_pod_container_available",
"expression": "windows_container_available{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_total_runtime",
"expression": "windows_container_cpu_usage_seconds_total{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_memory_usage",
"expression": "windows_container_memory_usage_commit_bytes{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_private_working_set_usage",
"expression": "windows_container_memory_usage_private_working_set_bytes{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_network_received_bytes_total",
"expression": "windows_container_network_receive_bytes_total{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_network_transmitted_bytes_total",
"expression": "windows_container_network_transmit_bytes_total{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "kube_pod_windows_container_resource_memory_request",
"expression": "max by (namespace, pod, container) (kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}) * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "kube_pod_windows_container_resource_memory_limit",
"expression": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "kube_pod_windows_container_resource_cpu_cores_request",
"expression": "max by (namespace, pod, container) ( kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}) * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "kube_pod_windows_container_resource_cpu_cores_limit",
"expression": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate",
"expression": "sum by (namespace, pod, container) (rate(windows_container_total_runtime{}[5m]))"
}
]
}
}
]
},
"parameters": {
"azureMonitorWorkspaceResourceId": {
"value": "[parameters('azureMonitorWorkspaceResourceId')]"
},
"azureMonitorWorkspaceLocation": {
"value": "[parameters('azureMonitorWorkspaceLocation')]"
},
"metricLabelsAllowlist": {
"value": "[parameters('metricLabelsAllowlist')]"
},
"metricAnnotationsAllowList": {
"value": "[parameters('metricAnnotationsAllowList')]"
},
"enableWindowsRecordingRules": {
"value": "[parameters('enableWindowsRecordingRules')]"
},
"clusterResourceId": {
"value": "[field('id')]"
},
"clusterLocation": {
"value": "[field('location')]"
}
}
}
}
}
}
}