apiVersion: operator.victoriametrics.com/v1beta1 kind: VMRule metadata: name: kafka-exporter-yc-rules namespace: vmstack spec: groups: - name: kafka-exporter-yc.rules interval: 5m rules: - alert: KafkaExporterYcDown expr: absent(up{kafka_instance="yc-kafka", cluster="brusnika-prod"} == 1) for: 10m labels: severity: critical team: infra cluster: brusnika-prod kafka_instance: yc-kafka source_cluster: yc-kafka annotations: summary: YC Kafka exporter is down in brusnika-prod description: No healthy kafka-exporter-yc target is scraped for 10 minutes. - alert: KafkaTopicNoMessagesFor12h expr: | ( sum by (topic, kafka_instance, source_cluster, cluster) ( max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h]) - min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h]) ) == 0 ) and on (topic, kafka_instance, source_cluster, cluster) ( min by (topic, kafka_instance, source_cluster, cluster) ( count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h]) ) >= 1400 ) and on (topic, kafka_instance, source_cluster, cluster) ( sum by (topic, kafka_instance, source_cluster, cluster) ( kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"} ) > 0 ) for: 5m labels: severity: warning team: infra cluster: brusnika-prod kafka_instance: yc-kafka source_cluster: yc-kafka annotations: summary: No new messages in Kafka topic for 12h description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours.