From b7b65a03c295b2dc30ae3b7e50778ac7e0ea3c6a Mon Sep 17 00:00:00 2001 From: Kochetkov S Date: Wed, 17 Jun 2026 19:35:39 +0300 Subject: [PATCH] Add observability stack for brusnika prod --- .../clusterissuer-letsencrypt.yaml | 4 + .../istio-dashboard-compat-vmrule.yaml | 18 ++ .../istio-gateway-stats-scrape.yaml | 31 ++ .../kafka-exporter-yc-dashboard.yaml | 12 + .../kafka-exporter-yc-rules.yaml | 53 ++++ .../infrastructure/kafka-exporter-yc.yaml | 184 +++++++++++ .../infrastructure/kustomization.yaml | 78 +++++ .../node-exporter-vmnodescrape.yaml | 20 ++ .../infrastructure/patches/goalert.yaml | 35 +++ .../infrastructure/patches/istio-config.yaml | 88 +++++- .../patches/kafka-exporter.yaml | 51 +++ .../infrastructure/patches/minio.yaml | 34 ++ .../infrastructure/patches/openobserve.yaml | 101 ++++++ .../patches/opentelemetry-collector.yaml | 92 ++++++ .../patches/opentelemetry-operator.yaml | 19 ++ .../patches/postgres-exporter.yaml | 73 +++++ .../patches/prometheus-stack.yaml | 292 ++++++++++++++++++ .../infrastructure/patches/vmstack.yaml | 106 +++++++ 18 files changed, 1289 insertions(+), 2 deletions(-) create mode 100644 clusters/brusnika-prod/infrastructure/istio-dashboard-compat-vmrule.yaml create mode 100644 clusters/brusnika-prod/infrastructure/istio-gateway-stats-scrape.yaml create mode 100644 clusters/brusnika-prod/infrastructure/kafka-exporter-yc-dashboard.yaml create mode 100644 clusters/brusnika-prod/infrastructure/kafka-exporter-yc-rules.yaml create mode 100644 clusters/brusnika-prod/infrastructure/kafka-exporter-yc.yaml create mode 100644 clusters/brusnika-prod/infrastructure/node-exporter-vmnodescrape.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/goalert.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/kafka-exporter.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/minio.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/openobserve.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/opentelemetry-collector.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/opentelemetry-operator.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/postgres-exporter.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/prometheus-stack.yaml create mode 100644 clusters/brusnika-prod/infrastructure/patches/vmstack.yaml diff --git a/clusters/brusnika-prod/infrastructure/clusterissuer-letsencrypt.yaml b/clusters/brusnika-prod/infrastructure/clusterissuer-letsencrypt.yaml index 82b34bb..cf48472 100644 --- a/clusters/brusnika-prod/infrastructure/clusterissuer-letsencrypt.yaml +++ b/clusters/brusnika-prod/infrastructure/clusterissuer-letsencrypt.yaml @@ -11,6 +11,10 @@ spec: solvers: - selector: dnsNames: + - grafana.brusnika.onprem.sarex.io + - minio.brusnika.onprem.sarex.io + - openobserve.brusnika.onprem.sarex.io + - vmalert.brusnika.onprem.sarex.io - zitadel.brusnika.onprem.sarex.io http01: ingress: diff --git a/clusters/brusnika-prod/infrastructure/istio-dashboard-compat-vmrule.yaml b/clusters/brusnika-prod/infrastructure/istio-dashboard-compat-vmrule.yaml new file mode 100644 index 0000000..a310122 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/istio-dashboard-compat-vmrule.yaml @@ -0,0 +1,18 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: istio-dashboard-compat + namespace: vmstack +spec: + groups: + - name: istio-dashboard-compat.rules + rules: + - record: kube_deployment_status_replicas_available + expr: | + label_replace( + kube_deployment_status_replicas_available{deployment="istio-ingressgateway", namespace="ingress-nginx"}, + "namespace", + "istio-system", + "namespace", + ".*" + ) diff --git a/clusters/brusnika-prod/infrastructure/istio-gateway-stats-scrape.yaml b/clusters/brusnika-prod/infrastructure/istio-gateway-stats-scrape.yaml new file mode 100644 index 0000000..a0f3295 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/istio-gateway-stats-scrape.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Service +metadata: + name: istio-ingressgateway-stats + namespace: ingress-nginx + labels: + istio: ingressgateway-stats +spec: + selector: + istio: ingressgateway + ports: + - name: stats + port: 15020 + targetPort: 15020 +--- +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: istio-ingressgateway-stats + namespace: prometheus-stack +spec: + namespaceSelector: + matchNames: + - ingress-nginx + selector: + matchLabels: + istio: ingressgateway-stats + endpoints: + - port: stats + path: /stats/prometheus + interval: 15s diff --git a/clusters/brusnika-prod/infrastructure/kafka-exporter-yc-dashboard.yaml b/clusters/brusnika-prod/infrastructure/kafka-exporter-yc-dashboard.yaml new file mode 100644 index 0000000..b0ffb1b --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/kafka-exporter-yc-dashboard.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kafka-exporter-yc-dashboard + namespace: prometheus-stack + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: Kafka +data: + kafka-exporter-yc.json: |- + {"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"links":[],"panels":[{"id":1,"type":"row","title":"YC Kafka exporter","collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"panels":[]},{"id":2,"type":"stat","title":"Exporter up","gridPos":{"h":4,"w":6,"x":0,"y":1},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"max(up{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\"})","refId":"A","range":false,"instant":true}],"fieldConfig":{"defaults":{"unit":"short","mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"red"},{"color":"green","value":1}]}},"overrides":[]},"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"}},{"id":3,"type":"stat","title":"Topics without new messages for 12h","gridPos":{"h":4,"w":9,"x":6,"y":1},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"count(((sum by (topic) (max_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}[12h]) - min_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}[12h])) == 0) and on(topic) (min by (topic) (count_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}[12h])) >= 1400) and on(topic) (sum by (topic) (kafka_topic_partitions{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}) > 0)))","refId":"A","range":false,"instant":true}],"fieldConfig":{"defaults":{"unit":"short","mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":1},{"color":"red","value":5}]}},"overrides":[]},"options":{"colorMode":"background","graphMode":"area","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"}},{"id":4,"type":"stat","title":"Known topics","gridPos":{"h":4,"w":9,"x":15,"y":1},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"count(count by (topic) (kafka_topic_partitions{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}))","refId":"A","range":false,"instant":true}],"fieldConfig":{"defaults":{"unit":"short","mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"colorMode":"value","graphMode":"none","justifyMode":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"textMode":"auto"}},{"id":5,"type":"table","title":"Topics with no offset growth for 12h","gridPos":{"h":8,"w":24,"x":0,"y":5},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"((sum by (topic, kafka_instance, cluster) (max_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}[12h]) - min_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}[12h])) == 0) and on(topic) (min by (topic) (count_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}[12h])) >= 1400) and on(topic) (sum by (topic) (kafka_topic_partitions{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"}) > 0))","refId":"A","range":false,"instant":true}],"fieldConfig":{"defaults":{"custom":{"align":"auto","cellOptions":{"type":"auto"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"showHeader":true,"cellHeight":"sm"},"transformations":[{"id":"labelsToFields","options":{"mode":"columns"}}]},{"id":10,"type":"row","title":"Topic traffic","collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":13},"panels":[]},{"id":11,"type":"timeseries","title":"Topic offset delta by $__rate_interval","gridPos":{"h":8,"w":12,"x":0,"y":14},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"sum by (topic) (clamp_min(delta(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"$topic\"}[$__rate_interval]), 0))","refId":"A","range":true,"legendFormat":"{{topic}}"}],"fieldConfig":{"defaults":{"unit":"short","custom":{"drawStyle":"line","lineInterpolation":"linear","lineWidth":1,"fillOpacity":10,"spanNulls":false,"showPoints":"auto","axisPlacement":"auto","scaleDistribution":{"type":"linear"},"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}}},{"id":12,"type":"timeseries","title":"Topic offset delta over 12h","gridPos":{"h":8,"w":12,"x":12,"y":14},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"sum by (topic) (max_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"$topic\"}[12h]) - min_over_time(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"$topic\"}[12h]))","refId":"A","range":true,"legendFormat":"{{topic}}"}],"fieldConfig":{"defaults":{"unit":"short","custom":{"drawStyle":"line","lineInterpolation":"linear","lineWidth":1,"fillOpacity":10,"spanNulls":false,"showPoints":"auto","axisPlacement":"auto","scaleDistribution":{"type":"linear"},"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}}},{"id":13,"type":"timeseries","title":"Current topic offset","gridPos":{"h":8,"w":12,"x":0,"y":22},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"sum by (topic) (kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"$topic\"})","refId":"A","range":true,"legendFormat":"{{topic}}"}],"fieldConfig":{"defaults":{"unit":"short","custom":{"drawStyle":"line","lineInterpolation":"linear","lineWidth":1,"fillOpacity":10,"spanNulls":false,"showPoints":"auto","axisPlacement":"auto","scaleDistribution":{"type":"linear"},"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}}},{"id":14,"type":"bargauge","title":"Partitions by topic","gridPos":{"h":8,"w":12,"x":12,"y":22},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"sum by (topic) (kafka_topic_partitions{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"$topic\"})","refId":"A","range":false,"legendFormat":"{{topic}}","instant":true}],"fieldConfig":{"defaults":{"unit":"short","mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"displayMode":"gradient","legend":{"displayMode":"list","placement":"bottom","showLegend":false},"orientation":"horizontal","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true}},{"id":20,"type":"row","title":"Consumer groups","collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":30},"panels":[]},{"id":21,"type":"timeseries","title":"Consumer lag by group/topic","gridPos":{"h":8,"w":24,"x":0,"y":31},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"sum by (consumergroup, topic) (kafka_consumergroup_lag_sum{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"$topic\"})","refId":"A","range":true,"legendFormat":"{{consumergroup}} / {{topic}}"}],"fieldConfig":{"defaults":{"unit":"short","custom":{"drawStyle":"line","lineInterpolation":"linear","lineWidth":1,"fillOpacity":10,"spanNulls":false,"showPoints":"auto","axisPlacement":"auto","scaleDistribution":{"type":"linear"},"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}}}],"preload":false,"refresh":"30s","schemaVersion":41,"tags":["kafka","kafka-exporter","brusnika-prod"],"templating":{"list":[{"current":{"text":"VictoriaMetrics","value":"vm"},"includeAll":false,"label":"Data Source","name":"ds_datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"current":{"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"definition":"label_values(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"},topic)","allValue":".*","includeAll":true,"label":"Topic","name":"topic","options":[],"query":{"qryType":1,"query":"label_values(kafka_topic_partition_current_offset{kafka_instance=\"yc-kafka\",cluster=\"brusnika-prod\",topic=~\"^(prod|system_log)\"},topic)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","type":"query"}]},"time":{"from":"now-12h","to":"now"},"timepicker":{},"timezone":"","title":"Kafka Exporter / YC Kafka","uid":"kafka-exporter-yc","version":1} diff --git a/clusters/brusnika-prod/infrastructure/kafka-exporter-yc-rules.yaml b/clusters/brusnika-prod/infrastructure/kafka-exporter-yc-rules.yaml new file mode 100644 index 0000000..95dd180 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/kafka-exporter-yc-rules.yaml @@ -0,0 +1,53 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: kafka-exporter-yc-rules + namespace: vmstack +spec: + groups: + - name: kafka-exporter-yc.rules + interval: 5m + rules: + - alert: KafkaExporterYcDown + expr: absent(up{kafka_instance="yc-kafka", cluster="brusnika-prod"} == 1) + for: 10m + labels: + severity: critical + team: infra + cluster: brusnika-prod + kafka_instance: yc-kafka + source_cluster: yc-kafka + annotations: + summary: YC Kafka exporter is down in brusnika-prod + description: No healthy kafka-exporter-yc target is scraped for 10 minutes. + - alert: KafkaTopicNoMessagesFor12h + expr: | + ( + sum by (topic, kafka_instance, source_cluster, cluster) ( + max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h]) + - + min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h]) + ) == 0 + ) + and on (topic, kafka_instance, source_cluster, cluster) + ( + min by (topic, kafka_instance, source_cluster, cluster) ( + count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h]) + ) >= 1400 + ) + and on (topic, kafka_instance, source_cluster, cluster) + ( + sum by (topic, kafka_instance, source_cluster, cluster) ( + kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"} + ) > 0 + ) + for: 5m + labels: + severity: warning + team: infra + cluster: brusnika-prod + kafka_instance: yc-kafka + source_cluster: yc-kafka + annotations: + summary: No new messages in Kafka topic for 12h + description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours. diff --git a/clusters/brusnika-prod/infrastructure/kafka-exporter-yc.yaml b/clusters/brusnika-prod/infrastructure/kafka-exporter-yc.yaml new file mode 100644 index 0000000..a983fac --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/kafka-exporter-yc.yaml @@ -0,0 +1,184 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kafka-exporter-yc + namespace: kafka-exporter +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: kafka-exporter-yc + namespace: kafka-exporter +spec: + dependsOn: + - name: prometheus-stack + namespace: prometheus-stack + interval: 5m + timeout: 10m + chart: + spec: + chart: kafka-exporter-prod + version: "0.27.0" + sourceRef: + kind: HelmRepository + name: yc-oci-charts + namespace: flux-system + interval: 10m + install: + remediation: + retries: 3 + upgrade: + remediation: + retries: 3 + postRenderers: + - kustomize: + patches: + - target: + group: apps + version: v1 + kind: Deployment + name: kafka-exporter-yc + patch: |- + - op: add + path: /spec/template/metadata/annotations + value: + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/auth-path: auth/kubernetes + vault.hashicorp.com/role: kafka-exporter-yc + vault.hashicorp.com/agent-inject-secret-kafka-bootstrap: secrets/data/vault/apps/kafka-exporter-yc + vault.hashicorp.com/agent-inject-template-kafka-bootstrap: |- + {{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}} + {{ index .Data.data "KAFKA_BOOTSTRAP" }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-kafka-user: secrets/data/vault/apps/kafka-exporter-yc + vault.hashicorp.com/agent-inject-template-kafka-user: |- + {{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}} + {{ index .Data.data "KAFKA_USER" }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-kafka-password: secrets/data/vault/apps/kafka-exporter-yc + vault.hashicorp.com/agent-inject-template-kafka-password: |- + {{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}} + {{ index .Data.data "KAFKA_PASSWORD" }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-kafka-ca.pem: secrets/data/vault/apps/kafka-exporter-yc + vault.hashicorp.com/agent-inject-template-kafka-ca.pem: |- + {{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}} + {{ index .Data.data "KAFKA_CA_PEM" }} + {{- end -}} + - op: add + path: /spec/template/spec/serviceAccountName + value: kafka-exporter-yc + - op: add + path: /spec/template/spec/imagePullSecrets + value: + - name: regcred + - op: add + path: /spec/template/spec/containers/0/command + value: + - /bin/sh + - -ec + - op: replace + path: /spec/template/spec/containers/0/args + value: + - |- + KAFKA_BOOTSTRAP="$(cat /vault/secrets/kafka-bootstrap)" + KAFKA_USER="$(cat /vault/secrets/kafka-user)" + KAFKA_PASSWORD="$(cat /vault/secrets/kafka-password)" + if command -v kafka_exporter >/dev/null 2>&1; then + KAFKA_EXPORTER_BIN="$(command -v kafka_exporter)" + else + KAFKA_EXPORTER_BIN=/bin/kafka_exporter + fi + OLD_IFS="${IFS}" + IFS=, + set -- + for broker in ${KAFKA_BOOTSTRAP}; do + broker="$(printf '%s' "${broker}" | tr -d '[:space:]')" + if [ -n "${broker}" ]; then + set -- "$@" --kafka.server="${broker}" + fi + done + IFS="${OLD_IFS}" + exec "${KAFKA_EXPORTER_BIN}" \ + "$@" \ + --sasl.enabled \ + --sasl.username="${KAFKA_USER}" \ + --sasl.password="${KAFKA_PASSWORD}" \ + --sasl.mechanism=scram-sha512 \ + --tls.enabled \ + --tls.ca-file=/vault/secrets/kafka-ca.pem \ + --kafka.labels=yc-kafka \ + --topic.exclude='^__.*' \ + --verbosity=0 + - op: replace + path: /spec/template/spec/containers/0/livenessProbe/initialDelaySeconds + value: 60 + - op: replace + path: /spec/template/spec/containers/0/livenessProbe/failureThreshold + value: 6 + - op: replace + path: /spec/template/spec/containers/0/readinessProbe/initialDelaySeconds + value: 30 + - op: replace + path: /spec/template/spec/containers/0/readinessProbe/failureThreshold + value: 6 + - target: + group: monitoring.coreos.com + version: v1 + kind: ServiceMonitor + name: kafka-exporter-yc + patch: |- + - op: add + path: /spec/selector/matchLabels/app.kubernetes.io~1instance + value: kafka-exporter-yc + - op: add + path: /spec/endpoints/0/relabelings + value: + - action: replace + targetLabel: kafka_instance + replacement: yc-kafka + - action: replace + targetLabel: source_cluster + replacement: yc-kafka + - action: replace + targetLabel: monitored_cluster + replacement: yc-kafka + - action: replace + targetLabel: cluster + replacement: brusnika-prod + values: + fullnameOverride: kafka-exporter-yc + image: + repository: danielqsj/kafka-exporter + tag: latest + pullPolicy: IfNotPresent + kafkaExporter: + kafka: + servers: + - kafka-bootstrap.from-vault.invalid:9091 + sasl: + enabled: false + tls: + enabled: false + prometheus: + serviceMonitor: + enabled: true + namespace: kafka-exporter + interval: 30s + additionalLabels: + app: kafka-exporter-yc + metricRelabelings: + - action: replace + targetLabel: kafka_instance + replacement: yc-kafka + - action: replace + targetLabel: source_cluster + replacement: yc-kafka + - action: replace + targetLabel: monitored_cluster + replacement: yc-kafka + - action: replace + targetLabel: cluster + replacement: brusnika-prod diff --git a/clusters/brusnika-prod/infrastructure/kustomization.yaml b/clusters/brusnika-prod/infrastructure/kustomization.yaml index 6468db7..f39b6d2 100644 --- a/clusters/brusnika-prod/infrastructure/kustomization.yaml +++ b/clusters/brusnika-prod/infrastructure/kustomization.yaml @@ -7,8 +7,23 @@ resources: - ../../../infrastructure/istio-config - ../../../infrastructure/vault - ../../../infrastructure/zitadel + - ../../../infrastructure/minio + - ../../../infrastructure/openobserve + - ../../../infrastructure/vmstack + - ../../../infrastructure/prometheus-stack + - ../../../infrastructure/opentelemetry-operator + - ../../../infrastructure/opentelemetry-collector + - ../../../infrastructure/goalert + - ../../../infrastructure/kafka-exporter + - ../../../infrastructure/postgres-exporter - ./vault-ingress.yaml - ./clusterissuer-letsencrypt.yaml + - ./node-exporter-vmnodescrape.yaml + - ./istio-gateway-stats-scrape.yaml + - ./istio-dashboard-compat-vmrule.yaml + - ./kafka-exporter-yc.yaml + - ./kafka-exporter-yc-rules.yaml + - ./kafka-exporter-yc-dashboard.yaml - ../../../infrastructure/failed-pod-cleanup patches: - path: ./patches/istio-gateway.yaml @@ -39,6 +54,69 @@ patches: kind: HelmRelease name: zitadel namespace: zitadel + - path: ./patches/minio.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: minio + namespace: minio + - path: ./patches/openobserve.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: openobserve + namespace: openobserve + - path: ./patches/vmstack.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: vmstack + namespace: vmstack + - path: ./patches/prometheus-stack.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: prometheus-stack + namespace: prometheus-stack + - path: ./patches/opentelemetry-operator.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: opentelemetry-operator + namespace: opentelemetry-operator + - path: ./patches/opentelemetry-collector.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: opentelemetry-collector + namespace: opentelemetry-collector + - path: ./patches/goalert.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: goalert + namespace: goalert + - path: ./patches/kafka-exporter.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: kafka-exporter + namespace: kafka-exporter + - path: ./patches/postgres-exporter.yaml + target: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + name: postgres-exporter + namespace: postgres-exporter - path: ./patches/failed-pod-cleanup.yaml target: group: helm.toolkit.fluxcd.io diff --git a/clusters/brusnika-prod/infrastructure/node-exporter-vmnodescrape.yaml b/clusters/brusnika-prod/infrastructure/node-exporter-vmnodescrape.yaml new file mode 100644 index 0000000..b090585 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/node-exporter-vmnodescrape.yaml @@ -0,0 +1,20 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMNodeScrape +metadata: + name: vm-prod-node-exporter + namespace: vmstack +spec: + path: /metrics + port: "9100" + scheme: http + selector: {} + relabelConfigs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - action: replace + sourceLabels: + - __meta_kubernetes_node_name + targetLabel: node + - action: replace + replacement: vm-stack/vm-prod-node-exporter + targetLabel: job diff --git a/clusters/brusnika-prod/infrastructure/patches/goalert.yaml b/clusters/brusnika-prod/infrastructure/patches/goalert.yaml new file mode 100644 index 0000000..c02e833 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/goalert.yaml @@ -0,0 +1,35 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: goalert + namespace: goalert +spec: + interval: 5m + timeout: 10m + values: + image: + name: cr.yandex/crp3ccidau046kdj8g9q/goalert:0.32.0 + pullPolicy: IfNotPresent + imagePullSecrets: + - name: regcred + global: + imagePullSecrets: + - regcred + goalert: + existingSecret: + name: postgresql-secret + keys: + GOALERT_DB_URL: GOALERT_DB_URL + GOALERT_DATA_ENCRYPTION_KEY: GOALERT_DATA_ENCRYPTION_KEY + environment: + GOALERT_HTTP_PREFIX: "" + postgresql: + enabled: false + ingress: + enabled: false + className: nginx + hosts: + - host: vmalert.brusnika.onprem.sarex.io + paths: + - path: / + pathType: Prefix diff --git a/clusters/brusnika-prod/infrastructure/patches/istio-config.yaml b/clusters/brusnika-prod/infrastructure/patches/istio-config.yaml index 5c82895..e2a9e8a 100644 --- a/clusters/brusnika-prod/infrastructure/patches/istio-config.yaml +++ b/clusters/brusnika-prod/infrastructure/patches/istio-config.yaml @@ -131,6 +131,27 @@ spec: issuerRef: name: letsencrypt kind: ClusterIssuer + grafana-tls: + namespace: ingress-nginx + dnsNames: + - grafana.brusnika.onprem.sarex.io + issuerRef: + name: letsencrypt + kind: ClusterIssuer + openobserve-tls: + namespace: ingress-nginx + dnsNames: + - openobserve.brusnika.onprem.sarex.io + issuerRef: + name: letsencrypt + kind: ClusterIssuer + vmalert-tls: + namespace: ingress-nginx + dnsNames: + - vmalert.brusnika.onprem.sarex.io + issuerRef: + name: letsencrypt + kind: ClusterIssuer istio: envoyFilters: {} authorizationPolicies: {} @@ -297,6 +318,36 @@ spec: - zitadel.brusnika.onprem.sarex.io tls: credentialName: zitadel-tls + grafana: + name: grafana-gw + namespace: ingress-nginx + selector: + istio: ingressgateway + servers: + - hosts: + - grafana.brusnika.onprem.sarex.io + tls: + credentialName: grafana-tls + openobserve: + name: openobserve-gw + namespace: ingress-nginx + selector: + istio: ingressgateway + servers: + - hosts: + - openobserve.brusnika.onprem.sarex.io + tls: + credentialName: openobserve-tls + vmalert: + name: vmalert-gw + namespace: ingress-nginx + selector: + istio: ingressgateway + servers: + - hosts: + - vmalert.brusnika.onprem.sarex.io + tls: + credentialName: vmalert-tls rabbitmq: name: rabbitmq-gw namespace: ingress-nginx @@ -540,8 +591,8 @@ spec: redirectCode: 308 - path: prefix: / - service: minio-console-service.minio.svc.cluster.local - port: 80 + service: minio-console.minio.svc.cluster.local + port: 9001 sso-check-vs: namespace: sso-check hosts: @@ -610,6 +661,39 @@ spec: prefix: / service: zitadel-idp-contour.zitadel.svc.cluster.local port: 8080 + grafana-vs: + namespace: prometheus-stack + hosts: + - grafana.brusnika.onprem.sarex.io + gateways: + - ingress-nginx/grafana-gw + routes: + - path: + prefix: / + service: prometheus-stack-grafana.prometheus-stack.svc.cluster.local + port: 80 + openobserve-vs: + namespace: openobserve + hosts: + - openobserve.brusnika.onprem.sarex.io + gateways: + - ingress-nginx/openobserve-gw + routes: + - path: + prefix: / + service: openobserve-web.openobserve.svc.cluster.local + port: 5080 + vmalert-vs: + namespace: goalert + hosts: + - vmalert.brusnika.onprem.sarex.io + gateways: + - ingress-nginx/vmalert-gw + routes: + - path: + prefix: / + service: goalert.goalert.svc.cluster.local + port: 8081 rabbitmq-vs: namespace: workflow hosts: diff --git a/clusters/brusnika-prod/infrastructure/patches/kafka-exporter.yaml b/clusters/brusnika-prod/infrastructure/patches/kafka-exporter.yaml new file mode 100644 index 0000000..7f87c65 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/kafka-exporter.yaml @@ -0,0 +1,51 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: kafka-exporter + namespace: kafka-exporter +spec: + dependsOn: + - name: prometheus-stack + namespace: prometheus-stack + interval: 5m + timeout: 10m + postRenderers: + - kustomize: + patches: + - target: + group: apps + version: v1 + kind: Deployment + name: kafka-exporter-kafka-exporter-prod + patch: |- + - op: add + path: /spec/template/spec/imagePullSecrets + value: + - name: regcred + - target: + group: monitoring.coreos.com + version: v1 + kind: ServiceMonitor + name: kafka-exporter-kafka-exporter-prod + patch: |- + - op: add + path: /spec/selector/matchLabels/app.kubernetes.io~1instance + value: kafka-exporter + values: + image: + repository: danielqsj/kafka-exporter + tag: latest + pullPolicy: IfNotPresent + kafkaExporter: + kafka: + servers: + - brusnika-prod-kafka-bootstrap.kafka.svc.cluster.local:9092 + sasl: + enabled: false + tls: + enabled: false + prometheus: + serviceMonitor: + enabled: true + namespace: kafka-exporter + interval: 30s diff --git a/clusters/brusnika-prod/infrastructure/patches/minio.yaml b/clusters/brusnika-prod/infrastructure/patches/minio.yaml new file mode 100644 index 0000000..a3d132a --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/minio.yaml @@ -0,0 +1,34 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: minio + namespace: minio +spec: + interval: 5m + timeout: 10m + values: + nameOverride: "minio" + mode: standalone + environment: + MINIO_SERVER_URL: "https://minio.brusnika.onprem.sarex.io" + MINIO_BROWSER_REDIRECT_URL: "https://minio.brusnika.onprem.sarex.io/console/" + MINIO_API_CORS_ALLOW_ORIGIN: "https://minio.brusnika.onprem.sarex.io" + imagePullSecrets: + - name: regcred + vaultRoot: + enabled: true + role: minio + authPath: auth/kubernetes + secretPath: secrets/data/minio/admin + rootUserKey: rootUser + rootPasswordKey: rootPassword + drivesPerNode: 1 + replicas: 1 + nodeSelector: + type: storage + persistence: + storageClass: local-path + size: 100Gi + resources: + requests: + memory: 1Gi diff --git a/clusters/brusnika-prod/infrastructure/patches/openobserve.yaml b/clusters/brusnika-prod/infrastructure/patches/openobserve.yaml new file mode 100644 index 0000000..4b78630 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/openobserve.yaml @@ -0,0 +1,101 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: openobserve + namespace: openobserve +spec: + interval: 5m + timeout: 30m + postRenderers: + - kustomize: + patches: + - target: + group: apps + version: v1 + kind: Deployment + name: openobserve-web + patch: |- + - op: replace + path: /spec/strategy + value: + type: Recreate + - op: replace + path: /spec/template/spec/containers/0/command + value: + - /bin/sh + - -ec + - op: replace + path: /spec/template/spec/containers/0/args + value: + - | + set -a + . /vault/secrets/openobserve-env + set +a + exec /openobserve + - op: replace + path: /spec/template/spec/containers/0/livenessProbe/initialDelaySeconds + value: 300 + - op: replace + path: /spec/template/spec/containers/0/readinessProbe/initialDelaySeconds + value: 60 + values: + universal-chart: + services: + openobserve: + deployment: + replicaCount: + _default: 1 + envs: + - name: ZO_HTTP_PORT + value: + _default: "5080" + - name: ZO_LOCAL_MODE + value: + _default: "false" + - name: ZO_META_STORE + value: + _default: postgres + - name: ZO_CLUSTER_COORDINATOR + value: + _default: nats + - name: ZO_NATS_REPLICAS + value: + _default: "1" + - name: ZO_S3_PROVIDER + value: + _default: s3 + - name: ZO_S3_SERVER_URL + value: + _default: http://minio.minio.svc.cluster.local:9000 + - name: ZO_S3_BUCKET_NAME + value: + _default: open-observe + - name: ZO_S3_REGION_NAME + value: + _default: ru-central1 + - name: ZO_TELEMETRY + value: + _default: "false" + serviceAccount: + enabled: true + name: + _default: openobserve-vault + imagePullSecrets: + create: + _default: false + name: + _default: regcred + openobserve: + secret: + create: false + nats: + enabled: true + replicaCount: 1 + persistence: + enabled: true + size: 10Gi + storageClassName: csi-disk + otelCollector: + enabled: false + vault: + enabled: true diff --git a/clusters/brusnika-prod/infrastructure/patches/opentelemetry-collector.yaml b/clusters/brusnika-prod/infrastructure/patches/opentelemetry-collector.yaml new file mode 100644 index 0000000..3abc5e0 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/opentelemetry-collector.yaml @@ -0,0 +1,92 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: opentelemetry-collector + namespace: opentelemetry-collector +spec: + install: + disableWait: true + upgrade: + disableWait: true + dependsOn: + - name: prometheus-stack + namespace: prometheus-stack + - name: openobserve + namespace: openobserve + interval: 5m + timeout: 30m + values: + imagePullSecrets: + - name: regcred + podAnnotations: + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/auth-path: auth/kubernetes + vault.hashicorp.com/role: openobserve + vault.hashicorp.com/agent-inject-secret-openobserve-basic-auth: secrets/data/vault/apps/openobserve + vault.hashicorp.com/agent-inject-template-openobserve-basic-auth: |- + {{ "{{- with secret \"secrets/data/vault/apps/openobserve\" -}}" }} + Basic {{ "{{ index .Data.data \"OPENOBSERVE_BASIC_AUTH\" }}" }} + {{ "{{- end -}}" }} + mode: daemonset + fullnameOverride: otel-collector + tolerations: + - operator: Exists + rollout: + rollingUpdate: + maxUnavailable: 4 + presets: + logsCollection: + enabled: true + includeCollectorLogs: false + kubernetesAttributes: + enabled: true + config: + receivers: + filelog: + include: + - /var/log/pods/*/*/*.log + exclude: + - /var/log/pods/opentelemetry-collector_*/*/*.log + start_at: end + operators: + - type: container + otlp: + protocols: + grpc: + endpoint: ${env:MY_POD_IP}:4317 + http: + endpoint: ${env:MY_POD_IP}:4318 + processors: + batch: {} + k8sattributes: {} + exporters: + otlp: + endpoint: http://openobserve-web.openobserve.svc.cluster.local:5080 + tls: + insecure: true + otlphttp/openobserve: + endpoint: http://openobserve-web.openobserve.svc.cluster.local:5080/api/default + headers: + Authorization: ${file:/vault/secrets/openobserve-basic-auth} + tls: + insecure: true + service: + pipelines: + logs: + receivers: + - filelog + processors: + - k8sattributes + - batch + exporters: + - otlphttp/openobserve + traces: + receivers: + - otlp + processors: + - k8sattributes + - batch + exporters: + - otlphttp/openobserve diff --git a/clusters/brusnika-prod/infrastructure/patches/opentelemetry-operator.yaml b/clusters/brusnika-prod/infrastructure/patches/opentelemetry-operator.yaml new file mode 100644 index 0000000..a786dda --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/opentelemetry-operator.yaml @@ -0,0 +1,19 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: opentelemetry-operator + namespace: opentelemetry-operator +spec: + interval: 5m + timeout: 10m + values: + imagePullSecrets: + - name: regcred + manager: + collectorImage: + repository: cr.yandex/crp3ccidau046kdj8g9q/opentelemetry-collector + admissionWebhooks: + certManager: + enabled: false + autoGenerateCert: + enabled: true diff --git a/clusters/brusnika-prod/infrastructure/patches/postgres-exporter.yaml b/clusters/brusnika-prod/infrastructure/patches/postgres-exporter.yaml new file mode 100644 index 0000000..211cbc9 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/postgres-exporter.yaml @@ -0,0 +1,73 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: postgres-exporter + namespace: postgres-exporter +spec: + dependsOn: + - name: prometheus-stack + namespace: prometheus-stack + interval: 5m + timeout: 10m + chart: + spec: + version: 0.0.2-prod + values: + image: + name: cr.yandex/crp3ccidau046kdj8g9q/postgres-exporter:preprod_21350302 + pullPolicy: IfNotPresent + pullSecrets: + - dockerhub + serviceMonitor: + enabled: true + namespace: postgres-exporter + config: + datasource: + host: 192.168.10.8 + user: root + port: '5432' + database: postgres + sslmode: disable + datasources: + - name: attachments + uri: 192.168.10.8:5432/attachments_db?sslmode=disable + - name: bim + uri: 192.168.10.8:5432/bimapidb?sslmode=disable + - name: comparisons + uri: 192.168.10.8:5432/comparisons_db?sslmode=disable + - name: django + uri: 192.168.10.8:5432/sarex_db?sslmode=disable + - name: documentations + uri: 192.168.10.8:5432/documentations?sslmode=disable + - name: drawings + uri: 192.168.10.8:5432/drawings?sslmode=disable + - name: eav + uri: 192.168.10.8:5432/eav?sslmode=disable + - name: flows + uri: 192.168.10.8:5432/flows_db?sslmode=disable + - name: inspections + uri: 192.168.10.8:5432/inspections_db?sslmode=disable + - name: issues + uri: 192.168.10.8:5432/issues?sslmode=disable + - name: notes + uri: 192.168.10.8:5432/notes_db?sslmode=disable + - name: openobserve + uri: 192.168.10.8:5432/openobserve?sslmode=disable + - name: postgres + uri: 192.168.10.8:5432/postgres?sslmode=disable + - name: resources + uri: 192.168.10.8:5432/resources?sslmode=disable + - name: rfi + uri: 192.168.10.8:5432/rfi_db?sslmode=disable + - name: subscriptions + uri: 192.168.10.8:5432/subscriptions?sslmode=disable + - name: system-log + uri: 192.168.10.8:5432/system_log?sslmode=disable + - name: transmittal + uri: 192.168.10.8:5432/transmittal_db?sslmode=disable + - name: workflow + uri: 192.168.10.8:5432/workflows_db?sslmode=disable + - name: workspaces + uri: 192.168.10.8:5432/workspaces_db?sslmode=disable + - name: zitadel + uri: 192.168.10.8:5432/zitadel?sslmode=disable diff --git a/clusters/brusnika-prod/infrastructure/patches/prometheus-stack.yaml b/clusters/brusnika-prod/infrastructure/patches/prometheus-stack.yaml new file mode 100644 index 0000000..c662d40 --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/prometheus-stack.yaml @@ -0,0 +1,292 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: prometheus-stack + namespace: prometheus-stack +spec: + interval: 5m + timeout: 20m + postRenderers: + - kustomize: + patches: + - target: + version: v1 + kind: ConfigMap + name: prometheus-stack-k8s-cluster-health + namespace: prometheus-stack + patch: |- + - op: replace + path: /data/k8s-cluster-health.json + value: |- + {"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":4400,"links":[],"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Nodes — состояние и ресурсы","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":4,"w":6,"x":0,"y":1},"id":2,"options":{"colorMode":"value","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"expr":"sum(max by (node) (kube_node_status_condition{condition=\"Ready\",status=\"true\"}))","instant":true,"refId":"A"}],"title":"Nodes Ready (по узлам)","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":4,"w":18,"x":6,"y":1},"id":3,"options":{"displayMode":"gradient","legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"sum by (condition) (max by (node,condition) (kube_node_status_condition{status=\"true\"}))","instant":true,"legendFormat":"{{label_name}}","refId":"A"}],"title":"Node conditions (true) по типу","type":"bargauge"},{"datasource":{"uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[{"options":{"0":{"color":"#808080","index":0},"1":{"color":"red","index":1}},"type":"value"}]},"overrides":[]},"gridPos":{"h":13,"w":24,"x":0,"y":5},"id":4,"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"pieType":"pie","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","exemplar":false,"expr":"max by (node, key, value, effect) (kube_node_spec_taint{effect=\"NoSchedule\"})","instant":true,"legendFormat":"{{ node }} | {{ key }}={{ value }}","range":false,"refId":"A"}],"title":"Node taints (NoSchedule)","type":"piechart"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":6,"w":12,"x":0,"y":18},"id":7,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"max by (node) (kube_node_status_allocatable{resource=\"memory\"})","instant":false,"legendFormat":"{{ node }}","range":true,"refId":"A"}],"title":"Allocatable Memory per node","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"cores"},"overrides":[]},"gridPos":{"h":6,"w":12,"x":12,"y":18},"id":6,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"max by (node) (kube_node_status_allocatable{resource=\"cpu\"})","instant":false,"legendFormat":"{{ node }}","range":true,"refId":"A"}],"title":"Allocatable CPU (cores) per node","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Pods — статусы и рестарты","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"}]}},"overrides":[]},"gridPos":{"h":6,"w":12,"x":0,"y":25},"id":11,"options":{"displayMode":"gradient","legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"sum by (phase) (kube_pod_status_phase{namespace=~\"$namespace\"})","instant":true,"legendFormat":"{{label_name}}","refId":"A"}],"title":"Pod phase (по фазам, namespace=$namespace)","type":"bargauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[{"options":{"Error":{"color":"red","index":0}},"type":"value"}],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":6,"w":12,"x":12,"y":25},"id":14,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":" sum by (reason) (\n kube_pod_container_status_last_terminated_reason{namespace=~\"$namespace\"}\n )","instant":true,"legendFormat":"{{ reason }}","refId":"A"}],"title":"Last terminated reason (containers) [namespace=$namespace]","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":7,"w":24,"x":0,"y":31},"id":13,"options":{"displayMode":"gradient","legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"topk(10, sum by (namespace, pod) (increase(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}[$__rate_interval])))","instant":true,"legendFormat":"{{ namespace }} / {{ pod }}","refId":"A"}],"title":"Top pod restarts (Δ за $__rate_interval) [namespace=$namespace]","type":"bargauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":38},"id":20,"panels":[],"title":"Jobs / CronJobs","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":3,"w":7,"x":0,"y":39},"id":21,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"expr":"sum(kube_job_status_failed)","instant":true,"refId":"A"}],"title":"Jobs failed","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":3,"w":8,"x":7,"y":39},"id":22,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"expr":"sum(kube_job_status_active)","instant":true,"refId":"A"}],"title":"Jobs active","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":3,"w":9,"x":15,"y":39},"id":23,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"expr":"sum(kube_job_status_succeeded)","instant":true,"refId":"A"}],"title":"Jobs succeeded","type":"stat"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":42},"id":30,"panels":[],"title":"Storage — PV/PVC и объём","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[]},"overrides":[]},"gridPos":{"h":8,"w":6,"x":0,"y":43},"id":31,"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"pieType":"donut","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"expr":"sum by (phase) (kube_persistentvolumeclaim_status_phase)","instant":true,"refId":"A"}],"title":"PVC status (по фазам)","type":"piechart"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[]},"overrides":[]},"gridPos":{"h":8,"w":6,"x":6,"y":43},"id":32,"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"pieType":"donut","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"expr":"sum by (phase) (kube_persistentvolume_status_phase)","instant":true,"refId":"A"}],"title":"PV status (по фазам)","type":"piechart"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":43},"id":33,"options":{"displayMode":"gradient","legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"topk(\n 10,\n (1 - (\n sum by (namespace, persistentvolumeclaim) (kubelet_volume_stats_available_bytes{persistentvolumeclaim!=\"\"})\n /\n sum by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{persistentvolumeclaim!=\"\"})\n )) * 100\n)","instant":true,"legendFormat":"{{ persistentvolumeclaim }}","refId":"A"}],"title":"Top PVC usage (util%)","type":"bargauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":51},"id":40,"panels":[],"title":"Workloads — StatefulSets и HPA","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":52},"id":41,"options":{"displayMode":"lcd","legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"horizontal","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","exemplar":false,"expr":"kube_statefulset_replicas{namespace=~\"$namespace\"}","format":"time_series","instant":true,"legendFormat":"{{namespace}}/{{statefulset}} - Desired","range":false,"refId":"A"}],"title":"StatefulSet replicas (desired)","transformations":[{"id":"labelsToFields","options":{"mode":"seriesToRows","valueLabel":"statefulset"}}],"type":"bargauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":52},"id":55,"options":{"displayMode":"lcd","legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":false},"maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"horizontal","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","exemplar":false,"expr":"kube_statefulset_status_replicas_ready{namespace=~\"$namespace\"}","format":"time_series","instant":true,"legendFormat":"{{namespace}}/{{statefulset}} - Desired","range":false,"refId":"A"}],"title":"StatefulSet replicas (desired)","transformations":[{"id":"labelsToFields","options":{"mode":"seriesToRows","valueLabel":"statefulset"}}],"type":"bargauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","fillOpacity":80,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineWidth":1,"scaleDistribution":{"type":"linear"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":8,"x":0,"y":60},"id":42,"options":{"barRadius":0,"barWidth":0.97,"fullHighlight":false,"groupWidth":0.7,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"orientation":"auto","showValue":"auto","stacking":"none","tooltip":{"hideZeros":false,"mode":"single","sort":"none"},"xTickLabelRotation":0,"xTickLabelSpacing":0},"pluginVersion":"11.6.1","targets":[{"expr":"kube_horizontalpodautoscaler_spec_min_replicas","instant":true,"refId":"A"}],"title":"HPA min","transformations":[{"id":"labelsToFields","options":{"mode":"seriesToRows","valueLabel":"horizontalpodautoscaler"}}],"type":"barchart"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","fillOpacity":80,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineWidth":1,"scaleDistribution":{"type":"linear"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":8,"x":8,"y":60},"id":56,"options":{"barRadius":0,"barWidth":0.97,"fullHighlight":false,"groupWidth":0.7,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"orientation":"auto","showValue":"auto","stacking":"none","tooltip":{"hideZeros":false,"mode":"single","sort":"none"},"xTickLabelRotation":0,"xTickLabelSpacing":0},"pluginVersion":"11.6.1","targets":[{"expr":"kube_horizontalpodautoscaler_spec_max_replicas","instant":true,"refId":"B"}],"title":"HPA max","transformations":[{"id":"labelsToFields","options":{"mode":"seriesToRows","valueLabel":"horizontalpodautoscaler"}}],"type":"barchart"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","fillOpacity":80,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"lineWidth":1,"scaleDistribution":{"type":"linear"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":8,"x":16,"y":60},"id":57,"options":{"barRadius":0,"barWidth":0.97,"fullHighlight":false,"groupWidth":0.7,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"orientation":"auto","showValue":"auto","stacking":"none","tooltip":{"hideZeros":false,"mode":"single","sort":"none"},"xTickLabelRotation":0,"xTickLabelSpacing":0},"pluginVersion":"11.6.1","targets":[{"expr":"kube_horizontalpodautoscaler_status_desired_replicas","instant":true,"refId":"C"}],"title":"HPA min/max/desired","transformations":[{"id":"labelsToFields","options":{"mode":"seriesToRows","valueLabel":"horizontalpodautoscaler"}}],"type":"barchart"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"hideFrom":{"legend":false,"tooltip":false,"viz":false}},"mappings":[]},"overrides":[]},"gridPos":{"h":9,"w":24,"x":0,"y":68},"id":43,"options":{"legend":{"displayMode":"list","placement":"bottom","showLegend":true},"pieType":"pie","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"sum by (condition) (max by (horizontalpodautoscaler,condition) (kube_horizontalpodautoscaler_status_condition{status=\"true\"}))","instant":true,"legendFormat":"{{label_name}}","refId":"A"}],"title":"HPA conditions (true) по типу","type":"piechart"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":77},"id":50,"panels":[],"title":"API Server — запросы и задержки","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":78},"id":51,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"expr":"sum by (code) (rate(apiserver_request_total[$__rate_interval]))","range":true,"refId":"A"}],"title":"API requests rate by code (apiserver_request_total)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":78},"id":52,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"expr":"sum by (code) (rate(rest_client_requests_total{code=~\"4..|5..\"}[$__rate_interval]))","range":true,"refId":"A"}],"title":"Client REST 4xx/5xx (rest_client_requests_total)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"s"},"overrides":[]},"gridPos":{"h":6,"w":12,"x":0,"y":86},"id":54,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"expr":"histogram_quantile(0.99, sum by (verb,le) (rate(apiserver_request_duration_seconds_bucket[$__rate_interval])))","range":true,"refId":"A"}],"title":"API latency p99 by verb","type":"timeseries"}],"preload":false,"refresh":"30s","schemaVersion":41,"tags":["kubernetes","kube-state-metrics","apiserver"],"templating":{"list":[{"current":{"text":"VictoriaMetrics","value":"vm"},"label":"Data Source","name":"ds_datasource","options":[],"query":"prometheus","refresh":1,"type":"datasource"},{"current":{"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"includeAll":true,"label":"Namespace","name":"namespace","options":[],"query":"label_values(kube_pod_info, namespace)","refresh":1,"type":"query"},{"current":{"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"definition":"label_values(kube_node_status_condition,node)","includeAll":true,"label":"Node","multi":true,"name":"node","options":[],"query":{"qryType":1,"query":"label_values(kube_node_status_condition,node)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"type":"query","allValue":".*"}]},"time":{"from":"now-3h","to":"now"},"timepicker":{},"timezone":"","title":"Kubernetes / Cluster Health","uid":"k8s-cluster-health","version":7} + - target: + version: v1 + kind: ConfigMap + name: prometheus-stack-node-exporter-dashboard + namespace: prometheus-stack + patch: |- + - op: replace + path: /data/node-exporter-dashboard.json + value: |- + {"annotations":{"list":[{"builtIn":1,"datasource":{"type":"grafana","uid":"-- Grafana --"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":3904,"links":[],"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Общая секция - Агрегированные метрики по всем серверам","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":85},{"color":"red","value":95}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":0,"y":1},"id":2,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"100 - (avg by (node) (rate(node_cpu_seconds_total{job=\"vm-stack/vm-prod-node-exporter\", mode=\"idle\"}[$__rate_interval])) * 100)","interval":"","legendFormat":"CPU Busy {{ node }}","range":true,"refId":"A"}],"title":"CPU Busy (Overall)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":6,"y":1},"id":3,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"avg(100 - ((node_memory_MemAvailable_bytes{job=\"vm-stack/vm-prod-node-exporter\"} * 100) / node_memory_MemTotal_bytes{job=\"vm-stack/vm-prod-node-exporter\"})) by (node)","interval":"","legendFormat":"RAM Used {{ node }}","range":true,"refId":"A"}],"title":"RAM Used (Overall)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":12,"y":1},"id":4,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"avg(100 - ((node_filesystem_avail_bytes{job=\"vm-stack/vm-prod-node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{job=\"vm-stack/vm-prod-node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"})) by (node)","interval":"","legendFormat":"Root FS Used {{ node }}","range":true,"refId":"A"}],"title":"Root FS Used (Overall)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":18,"y":1},"id":5,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"expr":"sum(rate(node_network_receive_bytes_total{job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval]) * 8) by (node)","interval":"","legendFormat":"recv {{node}}","refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"expr":"sum(rate(node_network_transmit_bytes_total{job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval]) * 8) by (node)","interval":"","legendFormat":"trans {{node}}","refId":"B"}],"title":"Network Traffic (Overall)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":9},"id":6,"panels":[],"title":"БЛОК №1. CPU / Load","type":"row"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":85},{"color":"red","value":95}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":0,"y":10},"id":7,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"vm"},"editorMode":"code","exemplar":false,"expr":"100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\",node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])) * 100)","instant":true,"interval":"","legendFormat":"CPU Busy","range":false,"refId":"A"}],"title":"1. CPU Busy (gauge)","type":"gauge"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":100}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":6,"y":10},"id":8,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_load5{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Sys Load 5m","range":true,"refId":"A"}],"title":"2. Sys Load (5m avg) (gauge)","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":100}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":12,"y":10},"id":9,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_load15{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Sys Load 15m","range":true,"refId":"A"}],"title":"3. Sys Load (15m avg) (gauge)","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":18,"y":10},"id":10,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"showThresholdLabels":false,"showThresholdMarkers":true,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"count(count(node_cpu_seconds_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}) by (cpu))","interval":"","legendFormat":"CPU Cores","range":true,"refId":"A"}],"title":"4. CPU Cores (stat)","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":18},"id":11,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"system\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Busy System {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"user\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Busy User {{node}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"iowait\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Busy Iowait {{node}}","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\".*irq\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Busy IRQs {{node}}","range":true,"refId":"D"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"steal\"}[$__rate_interval])) by (node)) * 100","interval":"","legendFormat":"Steal {{node}}","range":true,"refId":"E"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"idle\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Idle {{node}}","range":true,"refId":"F"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode!~\"(user|idle|system|iowait|irq|softirq|steal)\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Busy Other {{node}}","range":true,"refId":"G"}],"title":"5. CPU Basic (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":18},"id":12,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"system\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"System {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"user\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"User {{node}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"nice\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Nice {{node}}","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"idle\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Idle {{node}}","range":true,"refId":"D"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"iowait\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Iowait {{node}}","range":true,"refId":"E"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"irq\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"IRQ {{node}}","range":true,"refId":"F"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"softirq\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"SoftIRQ {{node}}","range":true,"refId":"G"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"steal\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Steal {{node}}","range":true,"refId":"H"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(avg(irate(node_cpu_seconds_total{node=~\"$node\", mode=\"guest\"}[$__rate_interval])) by (instance)) * 100","interval":"","legendFormat":"Guest {{node}}","range":true,"refId":"I"}],"title":"6. CPU — timeseries","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":26},"id":13,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(process_cpu_seconds_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"CPU time {{node}}","range":true,"refId":"A"}],"title":"7. CPU time spent in user and system contexts — timeseries","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":26},"id":14,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_KernelStack_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Kernel Stack {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_Percpu_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"PerCPU {{node}}","range":true,"refId":"B"}],"title":"8. Memory Kernel / CPU — timeseries","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":15,"panels":[],"title":"БЛОК №2. Memory & Swap","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":0,"y":35},"id":16,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"100 - ((node_memory_MemAvailable_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} * 100) / node_memory_MemTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"})","interval":"","legendFormat":"RAM Used","range":true,"refId":"A"}],"title":"1. RAM Used (gauge)","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":6,"y":35},"id":17,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"showThresholdLabels":false,"showThresholdMarkers":true,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_MemTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"RAM Total","range":true,"refId":"A"}],"title":"2. RAM Total (stat)","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":50},{"color":"red","value":80}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":12,"y":35},"id":18,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"((node_memory_SwapTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} - node_memory_SwapFree_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}))","interval":"","legendFormat":"SWAP Used","range":true,"refId":"A"}],"title":"3. SWAP Used (gauge)","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":18,"y":35},"id":19,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"showThresholdLabels":false,"showThresholdMarkers":true,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_SwapTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"SWAP Total","range":true,"refId":"A"}],"title":"4. SWAP Total (stat)","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":24,"x":0,"y":43},"id":20,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_MemTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"RAM Total {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_MemTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} - node_memory_MemFree_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} - (node_memory_Cached_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} + node_memory_Buffers_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"})","interval":"","legendFormat":"RAM Used {{node}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_Cached_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} + node_memory_Buffers_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"RAM Cache + Buffer {{node}}","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_memory_MemFree_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"RAM Free {{node}}","range":true,"refId":"D"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"(node_memory_SwapTotal_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} - node_memory_SwapFree_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"})","interval":"","legendFormat":"SWAP Used {{node}}","range":true,"refId":"E"}],"title":"5. Memory Basic (timeseries)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":51},"id":21,"panels":[],"title":"БЛОК №3. Disk & Filesystem","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":0,"y":52},"id":22,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"100 - ((node_filesystem_avail_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"})","interval":"","legendFormat":"Root FS Used","range":true,"refId":"A"}],"title":"1. Root FS Used (gauge)","type":"gauge"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":6,"x":6,"y":52},"id":23,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"showThresholdLabels":false,"showThresholdMarkers":true,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filesystem_size_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"}","interval":"","legendFormat":"RootFS Total","range":true,"refId":"A"}],"title":"2. RootFS Total (stat)","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":52},"id":24,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filesystem_size_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"}","interval":"","legendFormat":"Total {{node}} {{mountpoint}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filesystem_size_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"} - node_filesystem_avail_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"}","interval":"","legendFormat":"Used {{node}} {{mountpoint}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filesystem_avail_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"}","interval":"","legendFormat":"Free {{node}} {{mountpoint}}","range":true,"refId":"C"}],"title":"3. Disk Space Used Basic (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"iops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":60},"id":25,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_reads_completed_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"Reads {{node}} {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_writes_completed_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"Writes {{node}} {{device}}","range":true,"refId":"B"}],"title":"4. Disk IOps and Throughput (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":60},"id":26,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_read_time_seconds_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"Read Latency {{node}} {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_write_time_seconds_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"Write Latency {{node}} {{device}}","range":true,"refId":"B"}],"title":"5. Disk IO Latency (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":68},"id":27,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filesystem_avail_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"}","interval":"","legendFormat":"Free {{node}} {{mountpoint}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filesystem_size_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"} - node_filesystem_avail_bytes{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",fstype!=\"rootfs\"}","interval":"","legendFormat":"Used {{node}} {{mountpoint}}","range":true,"refId":"B"}],"title":"6. Disk Space Free/Used per mountpoint (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":68},"id":28,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_read_bytes_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"Read {{node}} {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_written_bytes_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"Write {{node}} {{device}}","range":true,"refId":"B"}],"title":"7. Storage Filesystem (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":95}]},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":24,"x":0,"y":76},"id":29,"options":{"minVizHeight":75,"minVizWidth":75,"orientation":"auto","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showThresholdLabels":false,"showThresholdMarkers":true,"sizing":"auto"},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_disk_io_time_seconds_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval]) * 100","interval":"","legendFormat":"I/O Utilization {{ device }}","range":true,"refId":"A"}],"title":"8. I/O Utilization (gauge)","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":84},"id":30,"panels":[],"title":"БЛОК №4. Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":85},"id":31,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_receive_bytes_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval]) * 8","interval":"","legendFormat":"recv {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_transmit_bytes_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval]) * 8","interval":"","legendFormat":"trans {{device}}","range":true,"refId":"B"}],"title":"1. Network Traffic Basic (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"pps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":85},"id":32,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_receive_packets_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"recv {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_transmit_packets_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"trans {{device}}","range":true,"refId":"B"}],"title":"2. Network Packets (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"pps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":93},"id":33,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_receive_errs_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"recv errs {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_transmit_errs_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"trans errs {{device}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_receive_drop_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"recv drop {{device}}","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_network_transmit_drop_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","interval":"","legendFormat":"trans drop {{device}}","range":true,"refId":"D"}],"title":"3. Network Errors and Drops (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":1}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":93},"id":34,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"showThresholdLabels":false,"showThresholdMarkers":true,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_network_up{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"{{device}}","range":true,"refId":"A"}],"title":"4. Network Operational Status (stat)","type":"stat"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"binBps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":101},"id":35,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_network_receive_bytes_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"In Octets {{node}} {{device}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_network_transmit_bytes_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Out Octets {{node}} {{device}}","range":true,"refId":"B"}],"title":"5. Netstat In/Out Octets (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":101},"id":36,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_sockstat_FRAG_inuse{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"FRAG inuse {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_sockstat_RAW_inuse{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"RAW inuse {{node}}","range":true,"refId":"B"}],"title":"6. Sockstat FRAG / RAW (timeseries)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":109},"id":37,"panels":[],"title":"БЛОК №5. System","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"vis":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":110},"id":55,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_load1{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Load 1m {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_load5{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Load 5m {{node}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_load15{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"Load 15m {{node}}","range":true,"refId":"C"}],"title":"1. Load Average (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":1}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":110},"id":48,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"showThresholdLabels":false,"showThresholdMarkers":true,"textMode":"auto","wideLayout":true},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_vmstat_oom_kill{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"OOM Kills","range":true,"refId":"A"}],"title":"2. OOM Killer (stat)","type":"stat"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"ops"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":118},"id":56,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"rate(node_context_switches_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","legendFormat":"Context Switches {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"rate(node_intr_total{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}[$__rate_interval])","hide":false,"instant":false,"legendFormat":"Interrupts {{node}}","range":true,"refId":"B"}],"title":"Context Switches and Interrupts (timeseries) ","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":118},"id":57,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"node_entropy_available_bits{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","legendFormat":"Entropy {{node}}","range":true,"refId":"A"}],"title":"Entropy (timeseries) ","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"celsius"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":126},"id":58,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"node_hwmon_temp_celsius{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","legendFormat":"{{sensor}} {{node}}","range":true,"refId":"A"}],"title":"Temperature Sensors (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":126},"id":59,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"node_timex_tick_seconds{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","legendFormat":"Tick {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_timex_tai_offset_seconds{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","hide":false,"instant":false,"legendFormat":"Offset {{node}}","range":true,"refId":"B"}],"title":"Time Misc (tick / TAI offset) (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":134},"id":60,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"node_timex_sync_status{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","legendFormat":"Sync Status {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_timex_frequency_adjustment_ratio{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","hide":false,"instant":false,"legendFormat":"Freq Adjustment {{node}}","range":true,"refId":"B"}],"title":"Time Synchronized Status (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"vm"},"description":"LA1 divided by the amount of CPU cores","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":134},"id":61,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"node_load1{node=~\"$node\", job=\"vm-stack/vm-prod-node-exporter\"} / on(node) group_left() count by (node)(node_cpu_seconds_total{node=~\"$node\", job=\"vm-stack/vm-prod-node-exporter\"})","legendFormat":"Load vs CPU Cores {{node}}","range":true,"refId":"A"}],"title":"Load vs CPU Cores (timeseries)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"vm"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"d"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":142},"id":62,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"editorMode":"code","expr":"(node_time_seconds{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"} - node_boot_time_seconds{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}) / 86400","legendFormat":"Uptime in days","range":true,"refId":"A"}],"title":"Uptime (stat)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":150},"id":49,"panels":[],"title":"БЛОК №6. Processes / File Descriptors","type":"row"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":151},"id":50,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filefd_allocated{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","interval":"","legendFormat":"File Descriptors {{ node }}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_filefd_maximum{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\"}","hide":false,"instant":false,"legendFormat":"File Descriptors Max {{ node }}","range":true,"refId":"B"}],"title":"1. File Descriptors (stat)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":151},"id":51,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"single","sort":"none"}},"pluginVersion":"11.6.1","targets":[{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_processes_state{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",state=\"running\"}","interval":"","legendFormat":"Running processes on {{node}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_processes_state{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",state=\"sleeping\"}","hide":false,"interval":"","legendFormat":"Sleeping processes on {{node}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"editorMode":"code","expr":"node_processes_state{node=~\"$node\",job=\"vm-stack/vm-prod-node-exporter\",state=\"zombie\"}","hide":false,"interval":"","legendFormat":"Zombie processes on {{node}}","range":true,"refId":"C"}],"title":"2. Processes (stat)","type":"timeseries"}],"preload":false,"refresh":"30s","schemaVersion":41,"tags":["infrastructure","monitoring","prometheus","node-exporter"],"templating":{"list":[{"current":{"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"${ds_datasource}"},"definition":"label_values(node_cpu_seconds_total{job=\"vm-stack/vm-prod-node-exporter\"},node)","allValue":".*","includeAll":true,"label":"Node","name":"node","options":[],"query":{"qryType":1,"query":"label_values(node_cpu_seconds_total{job=\"vm-stack/vm-prod-node-exporter\"},node)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","type":"query"},{"current":{"text":"VictoriaMetrics","value":"vm"},"includeAll":false,"label":"Data Source","name":"ds_datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Kubernetes / Node Exporter","uid":"node-exporter-dashboard","version":22} + - target: + group: monitoring.coreos.com + version: v1 + kind: PrometheusRule + name: application-alerts + namespace: prometheus-stack + patch: |- + - op: replace + path: /spec/groups/0/rules/0/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/1/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/2/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/3/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/4/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/5/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/6/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/7/labels/cluster + value: brusnika-prod + - target: + group: monitoring.coreos.com + version: v1 + kind: PrometheusRule + name: camunda-zeebe-alerts + namespace: prometheus-stack + patch: |- + - op: replace + path: /spec/groups/0/rules/0/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/1/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/2/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/3/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/4/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/5/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/6/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/7/labels/cluster + value: brusnika-prod + - target: + group: monitoring.coreos.com + version: v1 + kind: PrometheusRule + name: elasticsearch-alerts + namespace: prometheus-stack + patch: |- + - op: replace + path: /spec/groups/0/rules/0/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/1/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/2/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/3/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/4/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/5/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/6/labels/cluster + value: brusnika-prod + - target: + group: monitoring.coreos.com + version: v1 + kind: PrometheusRule + name: postgresql-alerts + namespace: prometheus-stack + patch: |- + - op: replace + path: /spec/groups/0/rules/0/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/1/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/2/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/3/labels/cluster + value: brusnika-prod + - target: + group: monitoring.coreos.com + version: v1 + kind: PrometheusRule + name: pv-free-space-alerts + namespace: prometheus-stack + patch: |- + - op: replace + path: /spec/groups/0/rules/0/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/1/labels/cluster + value: brusnika-prod + - op: replace + path: /spec/groups/0/rules/2/labels/cluster + value: brusnika-prod + install: + disableWait: true + upgrade: + disableWait: true + values: + fullnameOverride: prometheus-stack + crds: + enabled: true + defaultRules: + create: true + alertmanager: + enabled: false + prometheus: + enabled: false + prometheusOperator: + enabled: true + kubeStateMetrics: + enabled: false + nodeExporter: + enabled: true + prometheus-node-exporter: + tolerations: + - operator: Exists + prometheus: + monitor: + enabled: true + jobLabel: node-exporter + metricRelabelings: + - action: replace + targetLabel: job + replacement: vm-stack/vm-prod-node-exporter + extraServiceMonitors: + - name: zitadel-external-metrics + namespace: prometheus-stack + labels: + release: prometheus + spec: + endpoints: + - interval: 30s + scheme: https + path: /debug/metrics + port: https + tlsConfig: + serverName: zitadel.brusnika.onprem.sarex.io + insecureSkipVerify: false + relabelings: + - targetLabel: __address__ + replacement: zitadel.brusnika.onprem.sarex.io:443 + namespaceSelector: + matchNames: + - prometheus-stack + selector: + matchLabels: + app: zitadel-metrics-ext + - name: prometheus-istio-ingressgateway + namespace: istio-system + labels: + release: prometheus + spec: + endpoints: + - interval: 15s + port: status-port + jobLabel: istio + namespaceSelector: + any: true + selector: + matchExpressions: + - key: istio + operator: In + values: + - ingressgateway + - name: prometheus-istio-istiod + namespace: istio-system + labels: + release: prometheus + spec: + endpoints: + - interval: 15s + port: http-monitoring + jobLabel: istio + namespaceSelector: + any: true + selector: + matchExpressions: + - key: istio + operator: In + values: + - pilot + grafana: + enabled: true + adminUser: grafana-admin + admin: + existingSecret: grafana-admin + userKey: admin-user + passwordKey: admin-password + extraObjects: + - | + {{- $secret := lookup "v1" "Secret" .Release.Namespace "grafana-admin" }} + apiVersion: v1 + kind: Secret + metadata: + name: grafana-admin + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: grafana + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: Helm + type: Opaque + data: + admin-user: {{ .Values.adminUser | b64enc | quote }} + admin-password: {{ if $secret }}{{ index $secret.data "admin-password" | quote }}{{ else }}{{ randAlphaNum 40 | b64enc | quote }}{{ end }} + persistence: + enabled: true + type: sts + storageClassName: csi-disk + accessModes: + - ReadWriteOnce + size: 20Gi + ingress: + enabled: false + env: + GF_SERVER_DOMAIN: grafana.brusnika.onprem.sarex.io + GF_SERVER_ROOT_URL: https://grafana.brusnika.onprem.sarex.io/ + sidecar: + dashboards: + enabled: true + searchNamespace: ALL + label: grafana_dashboard + labelValue: "1" + datasources: + enabled: true + defaultDatasourceEnabled: true + isDefaultDatasource: true + name: VictoriaMetrics + uid: vm + url: http://vmsingle-vmstack.vmstack.svc.cluster.local:8428 diff --git a/clusters/brusnika-prod/infrastructure/patches/vmstack.yaml b/clusters/brusnika-prod/infrastructure/patches/vmstack.yaml new file mode 100644 index 0000000..cc04cbc --- /dev/null +++ b/clusters/brusnika-prod/infrastructure/patches/vmstack.yaml @@ -0,0 +1,106 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: vmstack + namespace: vmstack +spec: + interval: 5m + timeout: 20m + values: + global: + clusterLabel: cluster + nameOverride: vmstack + fullnameOverride: vmstack + defaultRules: + create: true + victoria-metrics-operator: + admissionWebhooks: + enabled: false + vmsingle: + enabled: true + spec: + retentionPeriod: 1w + replicaCount: 1 + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: csi-disk + vmcluster: + enabled: false + vmagent: + enabled: true + spec: + remoteWrite: + - url: http://vmsingle-vmstack.vmstack.svc.cluster.local.:8428/api/v1/write + vmalert: + enabled: true + spec: + updateStrategy: Recreate + port: "8880" + evaluationInterval: 30s + externalLabels: + cluster: brusnika-prod + notifiers: + - url: http://vmalertmanager-vmstack-0.vmalertmanager-vmstack.vmstack.svc.cluster.local:9093 + - url: https://vmalertmanager.sarex.tech + extraArgs: + external.url: https://vmalert.brusnika.onprem.sarex.io + ingress: + enabled: false + alertmanager: + enabled: true + spec: + replicaCount: 1 + externalURL: https://vmalert.brusnika.onprem.sarex.io + volumes: [] + volumeMounts: [] + config: + global: + resolve_timeout: 5m + route: + receiver: goalert + group_by: + - alertname + - namespace + - pod + group_wait: 30s + group_interval: 5m + repeat_interval: 6h + receivers: + - name: "null" + - name: goalert + webhook_configs: + - url: http://goalert.goalert.svc.cluster.local:8081/api/v2/prometheusalertmanager/incoming + send_resolved: true + kube-state-metrics: + enabled: true + prometheus-node-exporter: + enabled: false + vmScrape: + enabled: false + spec: + jobLabel: jobLabel + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}' + endpoints: + - port: metrics + relabelConfigs: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: node + - targetLabel: job + replacement: vm-stack/vm-prod-node-exporter + + metricRelabelConfigs: + - action: drop + source_labels: [mountpoint] + regex: "/var/lib/kubelet/pods.+" + vmNodeScrape: + enabled: false + grafana: + enabled: false