Add brusnika-stage YC Kafka exporter monitoring

This commit is contained in:
Kochetkov S 2026-06-16 11:33:38 +03:00
parent a342d22cd7
commit 69d07ba476
5 changed files with 230 additions and 0 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,47 @@
apiVersion: operator.victoriametrics.com/v1beta1
kind: VMRule
metadata:
name: kafka-exporter-yc-rules
namespace: vmstack
spec:
groups:
- name: kafka-exporter-yc.rules
interval: 5m
rules:
- alert: KafkaExporterYcDown
expr: absent(up{kafka_instance="yc-kafka", cluster="brusnika-stage"} == 1)
for: 10m
labels:
severity: critical
team: infra
cluster: brusnika-stage
kafka_instance: yc-kafka
source_cluster: yc-kafka
annotations:
summary: YC Kafka exporter is down in brusnika-stage
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
- alert: KafkaTopicNoMessagesFor12h
expr: |
(
sum by (topic, kafka_instance, source_cluster, cluster) (
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}[12h])
-
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}[12h])
) == 0
)
and on (topic, kafka_instance, source_cluster, cluster)
(
sum by (topic, kafka_instance, source_cluster, cluster) (
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}
) > 0
)
for: 5m
labels:
severity: warning
team: infra
cluster: brusnika-stage
kafka_instance: yc-kafka
source_cluster: yc-kafka
annotations:
summary: No new messages in Kafka topic for 12h
description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours.

View File

@ -0,0 +1,159 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kafka-exporter-yc
namespace: kafka-exporter
---
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kafka-exporter-yc
namespace: kafka-exporter
spec:
dependsOn:
- name: prometheus-stack
namespace: prometheus-stack
- name: vault
namespace: vault
interval: 5m
timeout: 10m
chart:
spec:
chart: kafka-exporter-prod
version: "0.27.0"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3
postRenderers:
- kustomize:
patches:
- target:
group: apps
version: v1
kind: Deployment
name: kafka-exporter-yc
patch: |-
- op: add
path: /spec/template/metadata/annotations
value:
vault.hashicorp.com/agent-init-first: "true"
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/auth-path: auth/kubernetes
vault.hashicorp.com/role: kafka-exporter-yc
vault.hashicorp.com/agent-inject-secret-kafka-bootstrap: secrets/data/vault/apps/kafka-exporter-yc
vault.hashicorp.com/agent-inject-template-kafka-bootstrap: |-
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
{{ index .Data.data "KAFKA_BOOTSTRAP" }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-kafka-user: secrets/data/vault/apps/kafka-exporter-yc
vault.hashicorp.com/agent-inject-template-kafka-user: |-
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
{{ index .Data.data "KAFKA_USER" }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-kafka-password: secrets/data/vault/apps/kafka-exporter-yc
vault.hashicorp.com/agent-inject-template-kafka-password: |-
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
{{ index .Data.data "KAFKA_PASSWORD" }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-kafka-ca.pem: secrets/data/vault/apps/kafka-exporter-yc
vault.hashicorp.com/agent-inject-template-kafka-ca.pem: |-
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
{{ index .Data.data "KAFKA_CA_PEM" }}
{{- end -}}
- op: add
path: /spec/template/spec/serviceAccountName
value: kafka-exporter-yc
- op: add
path: /spec/template/spec/imagePullSecrets
value:
- name: regcred
- op: add
path: /spec/template/spec/containers/0/command
value:
- /bin/sh
- -ec
- op: replace
path: /spec/template/spec/containers/0/args
value:
- |-
KAFKA_BOOTSTRAP="$(cat /vault/secrets/kafka-bootstrap)"
KAFKA_USER="$(cat /vault/secrets/kafka-user)"
KAFKA_PASSWORD="$(cat /vault/secrets/kafka-password)"
if command -v kafka_exporter >/dev/null 2>&1; then
KAFKA_EXPORTER_BIN="$(command -v kafka_exporter)"
else
KAFKA_EXPORTER_BIN=/bin/kafka_exporter
fi
OLD_IFS="${IFS}"
IFS=,
set --
for broker in ${KAFKA_BOOTSTRAP}; do
broker="$(printf '%s' "${broker}" | tr -d '[:space:]')"
if [ -n "${broker}" ]; then
set -- "$@" --kafka.server="${broker}"
fi
done
IFS="${OLD_IFS}"
exec "${KAFKA_EXPORTER_BIN}" \
"$@" \
--sasl.enabled \
--sasl.username="${KAFKA_USER}" \
--sasl.password="${KAFKA_PASSWORD}" \
--sasl.mechanism=scram-sha512 \
--tls.enabled \
--tls.ca-file=/vault/secrets/kafka-ca.pem \
--kafka.labels=yc-kafka \
--topic.exclude='^__.*' \
--verbosity=0
- target:
group: monitoring.coreos.com
version: v1
kind: ServiceMonitor
name: kafka-exporter-yc
patch: |-
- op: add
path: /spec/selector/matchLabels/app.kubernetes.io~1instance
value: kafka-exporter-yc
values:
fullnameOverride: kafka-exporter-yc
image:
repository: danielqsj/kafka-exporter
tag: latest
pullPolicy: IfNotPresent
kafkaExporter:
kafka:
servers:
- kafka-bootstrap.from-vault.invalid:9091
sasl:
enabled: false
tls:
enabled: false
prometheus:
serviceMonitor:
enabled: true
namespace: kafka-exporter
interval: 30s
additionalLabels:
app: kafka-exporter-yc
metricRelabelings:
- action: replace
targetLabel: kafka_instance
replacement: yc-kafka
- action: replace
targetLabel: source_cluster
replacement: yc-kafka
- action: replace
targetLabel: monitored_cluster
replacement: yc-kafka
- action: replace
targetLabel: cluster
replacement: brusnika-stage

View File

@ -21,6 +21,9 @@ resources:
- ./node-exporter-vmnodescrape.yaml - ./node-exporter-vmnodescrape.yaml
- ./istio-gateway-stats-scrape.yaml - ./istio-gateway-stats-scrape.yaml
- ./istio-dashboard-compat-vmrule.yaml - ./istio-dashboard-compat-vmrule.yaml
- ./kafka-exporter-yc.yaml
- ./kafka-exporter-yc-rules.yaml
- ./kafka-exporter-yc-dashboard.yaml
- ../../../infrastructure/failed-pod-cleanup - ../../../infrastructure/failed-pod-cleanup
patches: patches:
- path: ./patches/istio-gateway.yaml - path: ./patches/istio-gateway.yaml

View File

@ -22,6 +22,15 @@ spec:
path: /spec/template/spec/imagePullSecrets path: /spec/template/spec/imagePullSecrets
value: value:
- name: regcred - name: regcred
- target:
group: monitoring.coreos.com
version: v1
kind: ServiceMonitor
name: kafka-exporter-kafka-exporter-prod
patch: |-
- op: add
path: /spec/selector/matchLabels/app.kubernetes.io~1instance
value: kafka-exporter
values: values:
image: image:
repository: danielqsj/kafka-exporter repository: danielqsj/kafka-exporter