54 lines
2.2 KiB
YAML
54 lines
2.2 KiB
YAML
apiVersion: operator.victoriametrics.com/v1beta1
|
|
kind: VMRule
|
|
metadata:
|
|
name: kafka-exporter-yc-rules
|
|
namespace: vmstack
|
|
spec:
|
|
groups:
|
|
- name: kafka-exporter-yc.rules
|
|
interval: 5m
|
|
rules:
|
|
- alert: KafkaExporterYcDown
|
|
expr: absent(up{kafka_instance="yc-kafka", cluster="brusnika-prod"} == 1)
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
team: infra
|
|
cluster: brusnika-prod
|
|
kafka_instance: yc-kafka
|
|
source_cluster: yc-kafka
|
|
annotations:
|
|
summary: YC Kafka exporter is down in brusnika-prod
|
|
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
|
|
- alert: KafkaTopicNoMessagesFor12h
|
|
expr: |
|
|
(
|
|
sum by (topic, kafka_instance, source_cluster, cluster) (
|
|
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
|
-
|
|
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
|
) == 0
|
|
)
|
|
and on (topic, kafka_instance, source_cluster, cluster)
|
|
(
|
|
min by (topic, kafka_instance, source_cluster, cluster) (
|
|
count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
|
) >= 1400
|
|
)
|
|
and on (topic, kafka_instance, source_cluster, cluster)
|
|
(
|
|
sum by (topic, kafka_instance, source_cluster, cluster) (
|
|
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}
|
|
) > 0
|
|
)
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
team: infra
|
|
cluster: brusnika-prod
|
|
kafka_instance: yc-kafka
|
|
source_cluster: yc-kafka
|
|
annotations:
|
|
summary: No new messages in Kafka topic for 12h
|
|
description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours.
|