Fix brusnika observability exporters and alerts
This commit is contained in:
parent
849e0bfd9e
commit
721f62270e
@ -0,0 +1,202 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-connectors
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: http
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: connectors
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-identity
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: identity
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-operate
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: management
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: operate
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-optimize
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: management
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: optimize
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-tasklist
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: management
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: tasklist
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-zeebe
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: http
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: zeebe-broker
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-prod-zeebe-gateway
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-prod
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: http
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: zeebe-gateway
|
||||||
File diff suppressed because one or more lines are too long
@ -16,38 +16,38 @@ spec:
|
|||||||
team: infra
|
team: infra
|
||||||
cluster: brusnika-prod
|
cluster: brusnika-prod
|
||||||
kafka_instance: yc-kafka
|
kafka_instance: yc-kafka
|
||||||
source_cluster: yc-kafka
|
source_cluster: brusnika-prod
|
||||||
annotations:
|
annotations:
|
||||||
summary: YC Kafka exporter is down in brusnika-prod
|
summary: YC Kafka exporter is down in brusnika-prod
|
||||||
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
|
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
|
||||||
- alert: KafkaTopicNoMessagesFor12h
|
- alert: KafkaTopicNoMessages12h
|
||||||
expr: |
|
expr: |
|
||||||
(
|
(
|
||||||
sum by (topic, kafka_instance, source_cluster, cluster) (
|
sum by (topic, kafka_instance, source_cluster, cluster) (
|
||||||
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(bru\\.cde\\.folders\\.prod|system-log-prod)$"}[12h])
|
||||||
-
|
-
|
||||||
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(bru\\.cde\\.folders\\.prod|system-log-prod)$"}[12h])
|
||||||
) == 0
|
) == 0
|
||||||
)
|
)
|
||||||
and on (topic, kafka_instance, source_cluster, cluster)
|
and on (topic, kafka_instance, source_cluster, cluster)
|
||||||
(
|
(
|
||||||
min by (topic, kafka_instance, source_cluster, cluster) (
|
min by (topic, kafka_instance, source_cluster, cluster) (
|
||||||
count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(bru\\.cde\\.folders\\.prod|system-log-prod)$"}[12h])
|
||||||
) >= 1400
|
) >= 1400
|
||||||
)
|
)
|
||||||
and on (topic, kafka_instance, source_cluster, cluster)
|
and on (topic, kafka_instance, source_cluster, cluster)
|
||||||
(
|
(
|
||||||
sum by (topic, kafka_instance, source_cluster, cluster) (
|
sum by (topic, kafka_instance, source_cluster, cluster) (
|
||||||
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}
|
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(bru\\.cde\\.folders\\.prod|system-log-prod)$"}
|
||||||
) > 0
|
) > 0
|
||||||
)
|
)
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: critical
|
||||||
team: infra
|
team: infra
|
||||||
cluster: brusnika-prod
|
cluster: brusnika-prod
|
||||||
kafka_instance: yc-kafka
|
kafka_instance: yc-kafka
|
||||||
source_cluster: yc-kafka
|
source_cluster: brusnika-prod
|
||||||
annotations:
|
annotations:
|
||||||
summary: No new messages in Kafka topic for 12h
|
summary: No new messages in Kafka topic for 12h
|
||||||
description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours.
|
description: Kafka topic {{ $labels.topic }} has no offset growth for 12h in brusnika-prod
|
||||||
|
|||||||
@ -21,6 +21,8 @@ resources:
|
|||||||
- ./node-exporter-vmnodescrape.yaml
|
- ./node-exporter-vmnodescrape.yaml
|
||||||
- ./istio-gateway-stats-scrape.yaml
|
- ./istio-gateway-stats-scrape.yaml
|
||||||
- ./istio-dashboard-compat-vmrule.yaml
|
- ./istio-dashboard-compat-vmrule.yaml
|
||||||
|
- ./camunda-servicemonitors.yaml
|
||||||
|
- ./rabbitmq-exporter.yaml
|
||||||
- ./kafka-exporter-yc.yaml
|
- ./kafka-exporter-yc.yaml
|
||||||
- ./kafka-exporter-yc-rules.yaml
|
- ./kafka-exporter-yc-rules.yaml
|
||||||
- ./kafka-exporter-yc-dashboard.yaml
|
- ./kafka-exporter-yc-dashboard.yaml
|
||||||
|
|||||||
@ -29,45 +29,11 @@ spec:
|
|||||||
database: postgres
|
database: postgres
|
||||||
sslmode: disable
|
sslmode: disable
|
||||||
datasources:
|
datasources:
|
||||||
- name: attachments
|
- name: goalert
|
||||||
uri: 192.168.10.8:5432/attachments_db?sslmode=disable
|
uri: 192.168.10.8:5432/goalert?sslmode=disable
|
||||||
- name: bim
|
|
||||||
uri: 192.168.10.8:5432/bimapidb?sslmode=disable
|
|
||||||
- name: comparisons
|
|
||||||
uri: 192.168.10.8:5432/comparisons_db?sslmode=disable
|
|
||||||
- name: django
|
|
||||||
uri: 192.168.10.8:5432/sarex_db?sslmode=disable
|
|
||||||
- name: documentations
|
|
||||||
uri: 192.168.10.8:5432/documentations?sslmode=disable
|
|
||||||
- name: drawings
|
|
||||||
uri: 192.168.10.8:5432/drawings?sslmode=disable
|
|
||||||
- name: eav
|
|
||||||
uri: 192.168.10.8:5432/eav?sslmode=disable
|
|
||||||
- name: flows
|
|
||||||
uri: 192.168.10.8:5432/flows_db?sslmode=disable
|
|
||||||
- name: inspections
|
|
||||||
uri: 192.168.10.8:5432/inspections_db?sslmode=disable
|
|
||||||
- name: issues
|
|
||||||
uri: 192.168.10.8:5432/issues?sslmode=disable
|
|
||||||
- name: notes
|
|
||||||
uri: 192.168.10.8:5432/notes_db?sslmode=disable
|
|
||||||
- name: openobserve
|
- name: openobserve
|
||||||
uri: 192.168.10.8:5432/openobserve?sslmode=disable
|
uri: 192.168.10.8:5432/openobserve?sslmode=disable
|
||||||
- name: postgres
|
- name: postgres
|
||||||
uri: 192.168.10.8:5432/postgres?sslmode=disable
|
uri: 192.168.10.8:5432/postgres?sslmode=disable
|
||||||
- name: resources
|
|
||||||
uri: 192.168.10.8:5432/resources?sslmode=disable
|
|
||||||
- name: rfi
|
|
||||||
uri: 192.168.10.8:5432/rfi_db?sslmode=disable
|
|
||||||
- name: subscriptions
|
|
||||||
uri: 192.168.10.8:5432/subscriptions?sslmode=disable
|
|
||||||
- name: system-log
|
|
||||||
uri: 192.168.10.8:5432/system_log?sslmode=disable
|
|
||||||
- name: transmittal
|
|
||||||
uri: 192.168.10.8:5432/transmittal_db?sslmode=disable
|
|
||||||
- name: workflow
|
|
||||||
uri: 192.168.10.8:5432/workflows_db?sslmode=disable
|
|
||||||
- name: workspaces
|
|
||||||
uri: 192.168.10.8:5432/workspaces_db?sslmode=disable
|
|
||||||
- name: zitadel
|
- name: zitadel
|
||||||
uri: 192.168.10.8:5432/zitadel?sslmode=disable
|
uri: 192.168.10.8:5432/zitadel?sslmode=disable
|
||||||
|
|||||||
90
clusters/brusnika-prod/infrastructure/rabbitmq-exporter.yaml
Normal file
90
clusters/brusnika-prod/infrastructure/rabbitmq-exporter.yaml
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: rabbitmq-exporter
|
||||||
|
namespace: workflow
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: rabbitmq-exporter
|
||||||
|
image: kbudde/rabbitmq-exporter:1.0.0-RC19
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: RABBIT_URL
|
||||||
|
value: http://rabbitmq-service.workflow.svc.cluster.local:15672
|
||||||
|
- name: RABBIT_USER
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: rabbitmq-secret
|
||||||
|
key: username
|
||||||
|
- name: RABBIT_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: rabbitmq-secret
|
||||||
|
key: password
|
||||||
|
- name: PUBLISH_PORT
|
||||||
|
value: "9419"
|
||||||
|
- name: SKIPVERIFY
|
||||||
|
value: "true"
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 9419
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
memory: 128Mi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: rabbitmq-exporter
|
||||||
|
namespace: workflow
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
port: 9419
|
||||||
|
targetPort: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: rabbitmq-exporter
|
||||||
|
namespace: workflow
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
release: prometheus
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- interval: 30s
|
||||||
|
path: /metrics
|
||||||
|
port: metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-prod
|
||||||
|
- action: replace
|
||||||
|
targetLabel: rabbitmq_instance
|
||||||
|
replacement: workflow-rabbitmq
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
@ -0,0 +1,202 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-connectors
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: http
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: connectors
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-identity
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: identity
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-operate
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: management
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: operate
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-optimize
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: management
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: optimize
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-tasklist
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: management
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: tasklist
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-zeebe
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: http
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: zeebe-broker
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: camunda-camunda-stage-zeebe-gateway
|
||||||
|
namespace: camunda
|
||||||
|
labels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/instance: camunda
|
||||||
|
app.kubernetes.io/name: camunda-stage
|
||||||
|
app.kubernetes.io/part-of: camunda-platform
|
||||||
|
release: metrics
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- honorLabels: true
|
||||||
|
interval: 10s
|
||||||
|
path: /actuator/prometheus
|
||||||
|
port: http
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: camunda-platform
|
||||||
|
app.kubernetes.io/component: zeebe-gateway
|
||||||
File diff suppressed because one or more lines are too long
@ -16,38 +16,38 @@ spec:
|
|||||||
team: infra
|
team: infra
|
||||||
cluster: brusnika-stage
|
cluster: brusnika-stage
|
||||||
kafka_instance: yc-kafka
|
kafka_instance: yc-kafka
|
||||||
source_cluster: yc-kafka
|
source_cluster: brusnika-stage
|
||||||
annotations:
|
annotations:
|
||||||
summary: YC Kafka exporter is down in brusnika-stage
|
summary: YC Kafka exporter is down in brusnika-stage
|
||||||
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
|
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
|
||||||
- alert: KafkaTopicNoMessagesFor12h
|
- alert: KafkaTopicNoMessages12h
|
||||||
expr: |
|
expr: |
|
||||||
(
|
(
|
||||||
sum by (topic, kafka_instance, source_cluster, cluster) (
|
sum by (topic, kafka_instance, source_cluster, cluster) (
|
||||||
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}[12h])
|
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic=~"^bru\\.cde\\.folders\\.stage$"}[12h])
|
||||||
-
|
-
|
||||||
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}[12h])
|
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic=~"^bru\\.cde\\.folders\\.stage$"}[12h])
|
||||||
) == 0
|
) == 0
|
||||||
)
|
)
|
||||||
and on (topic, kafka_instance, source_cluster, cluster)
|
and on (topic, kafka_instance, source_cluster, cluster)
|
||||||
(
|
(
|
||||||
min by (topic, kafka_instance, source_cluster, cluster) (
|
min by (topic, kafka_instance, source_cluster, cluster) (
|
||||||
count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}[12h])
|
count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-stage", topic=~"^bru\\.cde\\.folders\\.stage$"}[12h])
|
||||||
) >= 1400
|
) >= 1400
|
||||||
)
|
)
|
||||||
and on (topic, kafka_instance, source_cluster, cluster)
|
and on (topic, kafka_instance, source_cluster, cluster)
|
||||||
(
|
(
|
||||||
sum by (topic, kafka_instance, source_cluster, cluster) (
|
sum by (topic, kafka_instance, source_cluster, cluster) (
|
||||||
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-stage", topic!~"^__.*"}
|
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-stage", topic=~"^bru\\.cde\\.folders\\.stage$"}
|
||||||
) > 0
|
) > 0
|
||||||
)
|
)
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: critical
|
||||||
team: infra
|
team: infra
|
||||||
cluster: brusnika-stage
|
cluster: brusnika-stage
|
||||||
kafka_instance: yc-kafka
|
kafka_instance: yc-kafka
|
||||||
source_cluster: yc-kafka
|
source_cluster: brusnika-stage
|
||||||
annotations:
|
annotations:
|
||||||
summary: No new messages in Kafka topic for 12h
|
summary: No new messages in Kafka topic for 12h
|
||||||
description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours.
|
description: Kafka topic {{ $labels.topic }} has no offset growth for 12h in brusnika-stage
|
||||||
|
|||||||
@ -21,6 +21,8 @@ resources:
|
|||||||
- ./node-exporter-vmnodescrape.yaml
|
- ./node-exporter-vmnodescrape.yaml
|
||||||
- ./istio-gateway-stats-scrape.yaml
|
- ./istio-gateway-stats-scrape.yaml
|
||||||
- ./istio-dashboard-compat-vmrule.yaml
|
- ./istio-dashboard-compat-vmrule.yaml
|
||||||
|
- ./camunda-servicemonitors.yaml
|
||||||
|
- ./rabbitmq-exporter.yaml
|
||||||
- ./kafka-exporter-yc.yaml
|
- ./kafka-exporter-yc.yaml
|
||||||
- ./kafka-exporter-yc-rules.yaml
|
- ./kafka-exporter-yc-rules.yaml
|
||||||
- ./kafka-exporter-yc-dashboard.yaml
|
- ./kafka-exporter-yc-dashboard.yaml
|
||||||
|
|||||||
@ -31,10 +31,12 @@ spec:
|
|||||||
datasources:
|
datasources:
|
||||||
- name: attachments
|
- name: attachments
|
||||||
uri: 192.168.2.45:5432/attachments_db?sslmode=disable
|
uri: 192.168.2.45:5432/attachments_db?sslmode=disable
|
||||||
- name: bim
|
- name: checklists
|
||||||
uri: 192.168.2.45:5432/bimapidb?sslmode=disable
|
uri: 192.168.2.45:5432/checklists?sslmode=disable
|
||||||
- name: comparisons
|
- name: comparisons
|
||||||
uri: 192.168.2.45:5432/comparisons_db?sslmode=disable
|
uri: 192.168.2.45:5432/comparisons_db?sslmode=disable
|
||||||
|
- name: contracts
|
||||||
|
uri: 192.168.2.45:5432/contracts?sslmode=disable
|
||||||
- name: django
|
- name: django
|
||||||
uri: 192.168.2.45:5432/sarex_db?sslmode=disable
|
uri: 192.168.2.45:5432/sarex_db?sslmode=disable
|
||||||
- name: documentations
|
- name: documentations
|
||||||
@ -45,6 +47,10 @@ spec:
|
|||||||
uri: 192.168.2.45:5432/eav?sslmode=disable
|
uri: 192.168.2.45:5432/eav?sslmode=disable
|
||||||
- name: flows
|
- name: flows
|
||||||
uri: 192.168.2.45:5432/flows_db?sslmode=disable
|
uri: 192.168.2.45:5432/flows_db?sslmode=disable
|
||||||
|
- name: gitea
|
||||||
|
uri: 192.168.2.45:5432/gitea?sslmode=disable
|
||||||
|
- name: goalert
|
||||||
|
uri: 192.168.2.45:5432/goalert?sslmode=disable
|
||||||
- name: inspections
|
- name: inspections
|
||||||
uri: 192.168.2.45:5432/inspections_db?sslmode=disable
|
uri: 192.168.2.45:5432/inspections_db?sslmode=disable
|
||||||
- name: issues
|
- name: issues
|
||||||
@ -55,6 +61,8 @@ spec:
|
|||||||
uri: 192.168.2.45:5432/openobserve?sslmode=disable
|
uri: 192.168.2.45:5432/openobserve?sslmode=disable
|
||||||
- name: postgres
|
- name: postgres
|
||||||
uri: 192.168.2.45:5432/postgres?sslmode=disable
|
uri: 192.168.2.45:5432/postgres?sslmode=disable
|
||||||
|
- name: pm
|
||||||
|
uri: 192.168.2.45:5432/pm?sslmode=disable
|
||||||
- name: resources
|
- name: resources
|
||||||
uri: 192.168.2.45:5432/resources?sslmode=disable
|
uri: 192.168.2.45:5432/resources?sslmode=disable
|
||||||
- name: rfi
|
- name: rfi
|
||||||
|
|||||||
@ -0,0 +1,90 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: rabbitmq-exporter
|
||||||
|
namespace: workflow
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: rabbitmq-exporter
|
||||||
|
image: kbudde/rabbitmq-exporter:1.0.0-RC19
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
env:
|
||||||
|
- name: RABBIT_URL
|
||||||
|
value: http://rabbitmq-service.workflow.svc.cluster.local:15672
|
||||||
|
- name: RABBIT_USER
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: rabbitmq-secret
|
||||||
|
key: username
|
||||||
|
- name: RABBIT_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: rabbitmq-secret
|
||||||
|
key: password
|
||||||
|
- name: PUBLISH_PORT
|
||||||
|
value: "9419"
|
||||||
|
- name: SKIPVERIFY
|
||||||
|
value: "true"
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 9419
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
memory: 128Mi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: rabbitmq-exporter
|
||||||
|
namespace: workflow
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
port: 9419
|
||||||
|
targetPort: metrics
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: rabbitmq-exporter
|
||||||
|
namespace: workflow
|
||||||
|
labels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
|
release: prometheus
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- interval: 30s
|
||||||
|
path: /metrics
|
||||||
|
port: metrics
|
||||||
|
relabelings:
|
||||||
|
- action: replace
|
||||||
|
targetLabel: cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: source_cluster
|
||||||
|
replacement: brusnika-stage
|
||||||
|
- action: replace
|
||||||
|
targetLabel: rabbitmq_instance
|
||||||
|
replacement: workflow-rabbitmq
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: rabbitmq-exporter
|
||||||
Loading…
Reference in New Issue
Block a user