Add observability stack for brusnika prod
This commit is contained in:
parent
81288a3f96
commit
b7b65a03c2
@ -11,6 +11,10 @@ spec:
|
||||
solvers:
|
||||
- selector:
|
||||
dnsNames:
|
||||
- grafana.brusnika.onprem.sarex.io
|
||||
- minio.brusnika.onprem.sarex.io
|
||||
- openobserve.brusnika.onprem.sarex.io
|
||||
- vmalert.brusnika.onprem.sarex.io
|
||||
- zitadel.brusnika.onprem.sarex.io
|
||||
http01:
|
||||
ingress:
|
||||
|
||||
@ -0,0 +1,18 @@
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMRule
|
||||
metadata:
|
||||
name: istio-dashboard-compat
|
||||
namespace: vmstack
|
||||
spec:
|
||||
groups:
|
||||
- name: istio-dashboard-compat.rules
|
||||
rules:
|
||||
- record: kube_deployment_status_replicas_available
|
||||
expr: |
|
||||
label_replace(
|
||||
kube_deployment_status_replicas_available{deployment="istio-ingressgateway", namespace="ingress-nginx"},
|
||||
"namespace",
|
||||
"istio-system",
|
||||
"namespace",
|
||||
".*"
|
||||
)
|
||||
@ -0,0 +1,31 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: istio-ingressgateway-stats
|
||||
namespace: ingress-nginx
|
||||
labels:
|
||||
istio: ingressgateway-stats
|
||||
spec:
|
||||
selector:
|
||||
istio: ingressgateway
|
||||
ports:
|
||||
- name: stats
|
||||
port: 15020
|
||||
targetPort: 15020
|
||||
---
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: istio-ingressgateway-stats
|
||||
namespace: prometheus-stack
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- ingress-nginx
|
||||
selector:
|
||||
matchLabels:
|
||||
istio: ingressgateway-stats
|
||||
endpoints:
|
||||
- port: stats
|
||||
path: /stats/prometheus
|
||||
interval: 15s
|
||||
File diff suppressed because one or more lines are too long
@ -0,0 +1,53 @@
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMRule
|
||||
metadata:
|
||||
name: kafka-exporter-yc-rules
|
||||
namespace: vmstack
|
||||
spec:
|
||||
groups:
|
||||
- name: kafka-exporter-yc.rules
|
||||
interval: 5m
|
||||
rules:
|
||||
- alert: KafkaExporterYcDown
|
||||
expr: absent(up{kafka_instance="yc-kafka", cluster="brusnika-prod"} == 1)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
team: infra
|
||||
cluster: brusnika-prod
|
||||
kafka_instance: yc-kafka
|
||||
source_cluster: yc-kafka
|
||||
annotations:
|
||||
summary: YC Kafka exporter is down in brusnika-prod
|
||||
description: No healthy kafka-exporter-yc target is scraped for 10 minutes.
|
||||
- alert: KafkaTopicNoMessagesFor12h
|
||||
expr: |
|
||||
(
|
||||
sum by (topic, kafka_instance, source_cluster, cluster) (
|
||||
max_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
||||
-
|
||||
min_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
||||
) == 0
|
||||
)
|
||||
and on (topic, kafka_instance, source_cluster, cluster)
|
||||
(
|
||||
min by (topic, kafka_instance, source_cluster, cluster) (
|
||||
count_over_time(kafka_topic_partition_current_offset{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}[12h])
|
||||
) >= 1400
|
||||
)
|
||||
and on (topic, kafka_instance, source_cluster, cluster)
|
||||
(
|
||||
sum by (topic, kafka_instance, source_cluster, cluster) (
|
||||
kafka_topic_partitions{kafka_instance="yc-kafka", cluster="brusnika-prod", topic=~"^(prod|system_log)$"}
|
||||
) > 0
|
||||
)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
team: infra
|
||||
cluster: brusnika-prod
|
||||
kafka_instance: yc-kafka
|
||||
source_cluster: yc-kafka
|
||||
annotations:
|
||||
summary: No new messages in Kafka topic for 12h
|
||||
description: Topic {{ $labels.topic }} in {{ $labels.kafka_instance }} has no offset growth for 12 hours.
|
||||
184
clusters/brusnika-prod/infrastructure/kafka-exporter-yc.yaml
Normal file
184
clusters/brusnika-prod/infrastructure/kafka-exporter-yc.yaml
Normal file
@ -0,0 +1,184 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kafka-exporter-yc
|
||||
namespace: kafka-exporter
|
||||
---
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: kafka-exporter-yc
|
||||
namespace: kafka-exporter
|
||||
spec:
|
||||
dependsOn:
|
||||
- name: prometheus-stack
|
||||
namespace: prometheus-stack
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
chart:
|
||||
spec:
|
||||
chart: kafka-exporter-prod
|
||||
version: "0.27.0"
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: yc-oci-charts
|
||||
namespace: flux-system
|
||||
interval: 10m
|
||||
install:
|
||||
remediation:
|
||||
retries: 3
|
||||
upgrade:
|
||||
remediation:
|
||||
retries: 3
|
||||
postRenderers:
|
||||
- kustomize:
|
||||
patches:
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: kafka-exporter-yc
|
||||
patch: |-
|
||||
- op: add
|
||||
path: /spec/template/metadata/annotations
|
||||
value:
|
||||
vault.hashicorp.com/agent-init-first: "true"
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/auth-path: auth/kubernetes
|
||||
vault.hashicorp.com/role: kafka-exporter-yc
|
||||
vault.hashicorp.com/agent-inject-secret-kafka-bootstrap: secrets/data/vault/apps/kafka-exporter-yc
|
||||
vault.hashicorp.com/agent-inject-template-kafka-bootstrap: |-
|
||||
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
|
||||
{{ index .Data.data "KAFKA_BOOTSTRAP" }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-kafka-user: secrets/data/vault/apps/kafka-exporter-yc
|
||||
vault.hashicorp.com/agent-inject-template-kafka-user: |-
|
||||
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
|
||||
{{ index .Data.data "KAFKA_USER" }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-kafka-password: secrets/data/vault/apps/kafka-exporter-yc
|
||||
vault.hashicorp.com/agent-inject-template-kafka-password: |-
|
||||
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
|
||||
{{ index .Data.data "KAFKA_PASSWORD" }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-kafka-ca.pem: secrets/data/vault/apps/kafka-exporter-yc
|
||||
vault.hashicorp.com/agent-inject-template-kafka-ca.pem: |-
|
||||
{{- with secret "secrets/data/vault/apps/kafka-exporter-yc" -}}
|
||||
{{ index .Data.data "KAFKA_CA_PEM" }}
|
||||
{{- end -}}
|
||||
- op: add
|
||||
path: /spec/template/spec/serviceAccountName
|
||||
value: kafka-exporter-yc
|
||||
- op: add
|
||||
path: /spec/template/spec/imagePullSecrets
|
||||
value:
|
||||
- name: regcred
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/command
|
||||
value:
|
||||
- /bin/sh
|
||||
- -ec
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/args
|
||||
value:
|
||||
- |-
|
||||
KAFKA_BOOTSTRAP="$(cat /vault/secrets/kafka-bootstrap)"
|
||||
KAFKA_USER="$(cat /vault/secrets/kafka-user)"
|
||||
KAFKA_PASSWORD="$(cat /vault/secrets/kafka-password)"
|
||||
if command -v kafka_exporter >/dev/null 2>&1; then
|
||||
KAFKA_EXPORTER_BIN="$(command -v kafka_exporter)"
|
||||
else
|
||||
KAFKA_EXPORTER_BIN=/bin/kafka_exporter
|
||||
fi
|
||||
OLD_IFS="${IFS}"
|
||||
IFS=,
|
||||
set --
|
||||
for broker in ${KAFKA_BOOTSTRAP}; do
|
||||
broker="$(printf '%s' "${broker}" | tr -d '[:space:]')"
|
||||
if [ -n "${broker}" ]; then
|
||||
set -- "$@" --kafka.server="${broker}"
|
||||
fi
|
||||
done
|
||||
IFS="${OLD_IFS}"
|
||||
exec "${KAFKA_EXPORTER_BIN}" \
|
||||
"$@" \
|
||||
--sasl.enabled \
|
||||
--sasl.username="${KAFKA_USER}" \
|
||||
--sasl.password="${KAFKA_PASSWORD}" \
|
||||
--sasl.mechanism=scram-sha512 \
|
||||
--tls.enabled \
|
||||
--tls.ca-file=/vault/secrets/kafka-ca.pem \
|
||||
--kafka.labels=yc-kafka \
|
||||
--topic.exclude='^__.*' \
|
||||
--verbosity=0
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/livenessProbe/initialDelaySeconds
|
||||
value: 60
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/livenessProbe/failureThreshold
|
||||
value: 6
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/readinessProbe/initialDelaySeconds
|
||||
value: 30
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/readinessProbe/failureThreshold
|
||||
value: 6
|
||||
- target:
|
||||
group: monitoring.coreos.com
|
||||
version: v1
|
||||
kind: ServiceMonitor
|
||||
name: kafka-exporter-yc
|
||||
patch: |-
|
||||
- op: add
|
||||
path: /spec/selector/matchLabels/app.kubernetes.io~1instance
|
||||
value: kafka-exporter-yc
|
||||
- op: add
|
||||
path: /spec/endpoints/0/relabelings
|
||||
value:
|
||||
- action: replace
|
||||
targetLabel: kafka_instance
|
||||
replacement: yc-kafka
|
||||
- action: replace
|
||||
targetLabel: source_cluster
|
||||
replacement: yc-kafka
|
||||
- action: replace
|
||||
targetLabel: monitored_cluster
|
||||
replacement: yc-kafka
|
||||
- action: replace
|
||||
targetLabel: cluster
|
||||
replacement: brusnika-prod
|
||||
values:
|
||||
fullnameOverride: kafka-exporter-yc
|
||||
image:
|
||||
repository: danielqsj/kafka-exporter
|
||||
tag: latest
|
||||
pullPolicy: IfNotPresent
|
||||
kafkaExporter:
|
||||
kafka:
|
||||
servers:
|
||||
- kafka-bootstrap.from-vault.invalid:9091
|
||||
sasl:
|
||||
enabled: false
|
||||
tls:
|
||||
enabled: false
|
||||
prometheus:
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
namespace: kafka-exporter
|
||||
interval: 30s
|
||||
additionalLabels:
|
||||
app: kafka-exporter-yc
|
||||
metricRelabelings:
|
||||
- action: replace
|
||||
targetLabel: kafka_instance
|
||||
replacement: yc-kafka
|
||||
- action: replace
|
||||
targetLabel: source_cluster
|
||||
replacement: yc-kafka
|
||||
- action: replace
|
||||
targetLabel: monitored_cluster
|
||||
replacement: yc-kafka
|
||||
- action: replace
|
||||
targetLabel: cluster
|
||||
replacement: brusnika-prod
|
||||
@ -7,8 +7,23 @@ resources:
|
||||
- ../../../infrastructure/istio-config
|
||||
- ../../../infrastructure/vault
|
||||
- ../../../infrastructure/zitadel
|
||||
- ../../../infrastructure/minio
|
||||
- ../../../infrastructure/openobserve
|
||||
- ../../../infrastructure/vmstack
|
||||
- ../../../infrastructure/prometheus-stack
|
||||
- ../../../infrastructure/opentelemetry-operator
|
||||
- ../../../infrastructure/opentelemetry-collector
|
||||
- ../../../infrastructure/goalert
|
||||
- ../../../infrastructure/kafka-exporter
|
||||
- ../../../infrastructure/postgres-exporter
|
||||
- ./vault-ingress.yaml
|
||||
- ./clusterissuer-letsencrypt.yaml
|
||||
- ./node-exporter-vmnodescrape.yaml
|
||||
- ./istio-gateway-stats-scrape.yaml
|
||||
- ./istio-dashboard-compat-vmrule.yaml
|
||||
- ./kafka-exporter-yc.yaml
|
||||
- ./kafka-exporter-yc-rules.yaml
|
||||
- ./kafka-exporter-yc-dashboard.yaml
|
||||
- ../../../infrastructure/failed-pod-cleanup
|
||||
patches:
|
||||
- path: ./patches/istio-gateway.yaml
|
||||
@ -39,6 +54,69 @@ patches:
|
||||
kind: HelmRelease
|
||||
name: zitadel
|
||||
namespace: zitadel
|
||||
- path: ./patches/minio.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: minio
|
||||
namespace: minio
|
||||
- path: ./patches/openobserve.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: openobserve
|
||||
namespace: openobserve
|
||||
- path: ./patches/vmstack.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: vmstack
|
||||
namespace: vmstack
|
||||
- path: ./patches/prometheus-stack.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: prometheus-stack
|
||||
namespace: prometheus-stack
|
||||
- path: ./patches/opentelemetry-operator.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: opentelemetry-operator
|
||||
namespace: opentelemetry-operator
|
||||
- path: ./patches/opentelemetry-collector.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: opentelemetry-collector
|
||||
namespace: opentelemetry-collector
|
||||
- path: ./patches/goalert.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: goalert
|
||||
namespace: goalert
|
||||
- path: ./patches/kafka-exporter.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: kafka-exporter
|
||||
namespace: kafka-exporter
|
||||
- path: ./patches/postgres-exporter.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2
|
||||
kind: HelmRelease
|
||||
name: postgres-exporter
|
||||
namespace: postgres-exporter
|
||||
- path: ./patches/failed-pod-cleanup.yaml
|
||||
target:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMNodeScrape
|
||||
metadata:
|
||||
name: vm-prod-node-exporter
|
||||
namespace: vmstack
|
||||
spec:
|
||||
path: /metrics
|
||||
port: "9100"
|
||||
scheme: http
|
||||
selector: {}
|
||||
relabelConfigs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_node_name
|
||||
targetLabel: node
|
||||
- action: replace
|
||||
replacement: vm-stack/vm-prod-node-exporter
|
||||
targetLabel: job
|
||||
35
clusters/brusnika-prod/infrastructure/patches/goalert.yaml
Normal file
35
clusters/brusnika-prod/infrastructure/patches/goalert.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: goalert
|
||||
namespace: goalert
|
||||
spec:
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
values:
|
||||
image:
|
||||
name: cr.yandex/crp3ccidau046kdj8g9q/goalert:0.32.0
|
||||
pullPolicy: IfNotPresent
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
global:
|
||||
imagePullSecrets:
|
||||
- regcred
|
||||
goalert:
|
||||
existingSecret:
|
||||
name: postgresql-secret
|
||||
keys:
|
||||
GOALERT_DB_URL: GOALERT_DB_URL
|
||||
GOALERT_DATA_ENCRYPTION_KEY: GOALERT_DATA_ENCRYPTION_KEY
|
||||
environment:
|
||||
GOALERT_HTTP_PREFIX: ""
|
||||
postgresql:
|
||||
enabled: false
|
||||
ingress:
|
||||
enabled: false
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: vmalert.brusnika.onprem.sarex.io
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
@ -131,6 +131,27 @@ spec:
|
||||
issuerRef:
|
||||
name: letsencrypt
|
||||
kind: ClusterIssuer
|
||||
grafana-tls:
|
||||
namespace: ingress-nginx
|
||||
dnsNames:
|
||||
- grafana.brusnika.onprem.sarex.io
|
||||
issuerRef:
|
||||
name: letsencrypt
|
||||
kind: ClusterIssuer
|
||||
openobserve-tls:
|
||||
namespace: ingress-nginx
|
||||
dnsNames:
|
||||
- openobserve.brusnika.onprem.sarex.io
|
||||
issuerRef:
|
||||
name: letsencrypt
|
||||
kind: ClusterIssuer
|
||||
vmalert-tls:
|
||||
namespace: ingress-nginx
|
||||
dnsNames:
|
||||
- vmalert.brusnika.onprem.sarex.io
|
||||
issuerRef:
|
||||
name: letsencrypt
|
||||
kind: ClusterIssuer
|
||||
istio:
|
||||
envoyFilters: {}
|
||||
authorizationPolicies: {}
|
||||
@ -297,6 +318,36 @@ spec:
|
||||
- zitadel.brusnika.onprem.sarex.io
|
||||
tls:
|
||||
credentialName: zitadel-tls
|
||||
grafana:
|
||||
name: grafana-gw
|
||||
namespace: ingress-nginx
|
||||
selector:
|
||||
istio: ingressgateway
|
||||
servers:
|
||||
- hosts:
|
||||
- grafana.brusnika.onprem.sarex.io
|
||||
tls:
|
||||
credentialName: grafana-tls
|
||||
openobserve:
|
||||
name: openobserve-gw
|
||||
namespace: ingress-nginx
|
||||
selector:
|
||||
istio: ingressgateway
|
||||
servers:
|
||||
- hosts:
|
||||
- openobserve.brusnika.onprem.sarex.io
|
||||
tls:
|
||||
credentialName: openobserve-tls
|
||||
vmalert:
|
||||
name: vmalert-gw
|
||||
namespace: ingress-nginx
|
||||
selector:
|
||||
istio: ingressgateway
|
||||
servers:
|
||||
- hosts:
|
||||
- vmalert.brusnika.onprem.sarex.io
|
||||
tls:
|
||||
credentialName: vmalert-tls
|
||||
rabbitmq:
|
||||
name: rabbitmq-gw
|
||||
namespace: ingress-nginx
|
||||
@ -540,8 +591,8 @@ spec:
|
||||
redirectCode: 308
|
||||
- path:
|
||||
prefix: /
|
||||
service: minio-console-service.minio.svc.cluster.local
|
||||
port: 80
|
||||
service: minio-console.minio.svc.cluster.local
|
||||
port: 9001
|
||||
sso-check-vs:
|
||||
namespace: sso-check
|
||||
hosts:
|
||||
@ -610,6 +661,39 @@ spec:
|
||||
prefix: /
|
||||
service: zitadel-idp-contour.zitadel.svc.cluster.local
|
||||
port: 8080
|
||||
grafana-vs:
|
||||
namespace: prometheus-stack
|
||||
hosts:
|
||||
- grafana.brusnika.onprem.sarex.io
|
||||
gateways:
|
||||
- ingress-nginx/grafana-gw
|
||||
routes:
|
||||
- path:
|
||||
prefix: /
|
||||
service: prometheus-stack-grafana.prometheus-stack.svc.cluster.local
|
||||
port: 80
|
||||
openobserve-vs:
|
||||
namespace: openobserve
|
||||
hosts:
|
||||
- openobserve.brusnika.onprem.sarex.io
|
||||
gateways:
|
||||
- ingress-nginx/openobserve-gw
|
||||
routes:
|
||||
- path:
|
||||
prefix: /
|
||||
service: openobserve-web.openobserve.svc.cluster.local
|
||||
port: 5080
|
||||
vmalert-vs:
|
||||
namespace: goalert
|
||||
hosts:
|
||||
- vmalert.brusnika.onprem.sarex.io
|
||||
gateways:
|
||||
- ingress-nginx/vmalert-gw
|
||||
routes:
|
||||
- path:
|
||||
prefix: /
|
||||
service: goalert.goalert.svc.cluster.local
|
||||
port: 8081
|
||||
rabbitmq-vs:
|
||||
namespace: workflow
|
||||
hosts:
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: kafka-exporter
|
||||
namespace: kafka-exporter
|
||||
spec:
|
||||
dependsOn:
|
||||
- name: prometheus-stack
|
||||
namespace: prometheus-stack
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
postRenderers:
|
||||
- kustomize:
|
||||
patches:
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: kafka-exporter-kafka-exporter-prod
|
||||
patch: |-
|
||||
- op: add
|
||||
path: /spec/template/spec/imagePullSecrets
|
||||
value:
|
||||
- name: regcred
|
||||
- target:
|
||||
group: monitoring.coreos.com
|
||||
version: v1
|
||||
kind: ServiceMonitor
|
||||
name: kafka-exporter-kafka-exporter-prod
|
||||
patch: |-
|
||||
- op: add
|
||||
path: /spec/selector/matchLabels/app.kubernetes.io~1instance
|
||||
value: kafka-exporter
|
||||
values:
|
||||
image:
|
||||
repository: danielqsj/kafka-exporter
|
||||
tag: latest
|
||||
pullPolicy: IfNotPresent
|
||||
kafkaExporter:
|
||||
kafka:
|
||||
servers:
|
||||
- brusnika-prod-kafka-bootstrap.kafka.svc.cluster.local:9092
|
||||
sasl:
|
||||
enabled: false
|
||||
tls:
|
||||
enabled: false
|
||||
prometheus:
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
namespace: kafka-exporter
|
||||
interval: 30s
|
||||
34
clusters/brusnika-prod/infrastructure/patches/minio.yaml
Normal file
34
clusters/brusnika-prod/infrastructure/patches/minio.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: minio
|
||||
spec:
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
values:
|
||||
nameOverride: "minio"
|
||||
mode: standalone
|
||||
environment:
|
||||
MINIO_SERVER_URL: "https://minio.brusnika.onprem.sarex.io"
|
||||
MINIO_BROWSER_REDIRECT_URL: "https://minio.brusnika.onprem.sarex.io/console/"
|
||||
MINIO_API_CORS_ALLOW_ORIGIN: "https://minio.brusnika.onprem.sarex.io"
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
vaultRoot:
|
||||
enabled: true
|
||||
role: minio
|
||||
authPath: auth/kubernetes
|
||||
secretPath: secrets/data/minio/admin
|
||||
rootUserKey: rootUser
|
||||
rootPasswordKey: rootPassword
|
||||
drivesPerNode: 1
|
||||
replicas: 1
|
||||
nodeSelector:
|
||||
type: storage
|
||||
persistence:
|
||||
storageClass: local-path
|
||||
size: 100Gi
|
||||
resources:
|
||||
requests:
|
||||
memory: 1Gi
|
||||
101
clusters/brusnika-prod/infrastructure/patches/openobserve.yaml
Normal file
101
clusters/brusnika-prod/infrastructure/patches/openobserve.yaml
Normal file
@ -0,0 +1,101 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: openobserve
|
||||
namespace: openobserve
|
||||
spec:
|
||||
interval: 5m
|
||||
timeout: 30m
|
||||
postRenderers:
|
||||
- kustomize:
|
||||
patches:
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: openobserve-web
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/strategy
|
||||
value:
|
||||
type: Recreate
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/command
|
||||
value:
|
||||
- /bin/sh
|
||||
- -ec
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/args
|
||||
value:
|
||||
- |
|
||||
set -a
|
||||
. /vault/secrets/openobserve-env
|
||||
set +a
|
||||
exec /openobserve
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/livenessProbe/initialDelaySeconds
|
||||
value: 300
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/readinessProbe/initialDelaySeconds
|
||||
value: 60
|
||||
values:
|
||||
universal-chart:
|
||||
services:
|
||||
openobserve:
|
||||
deployment:
|
||||
replicaCount:
|
||||
_default: 1
|
||||
envs:
|
||||
- name: ZO_HTTP_PORT
|
||||
value:
|
||||
_default: "5080"
|
||||
- name: ZO_LOCAL_MODE
|
||||
value:
|
||||
_default: "false"
|
||||
- name: ZO_META_STORE
|
||||
value:
|
||||
_default: postgres
|
||||
- name: ZO_CLUSTER_COORDINATOR
|
||||
value:
|
||||
_default: nats
|
||||
- name: ZO_NATS_REPLICAS
|
||||
value:
|
||||
_default: "1"
|
||||
- name: ZO_S3_PROVIDER
|
||||
value:
|
||||
_default: s3
|
||||
- name: ZO_S3_SERVER_URL
|
||||
value:
|
||||
_default: http://minio.minio.svc.cluster.local:9000
|
||||
- name: ZO_S3_BUCKET_NAME
|
||||
value:
|
||||
_default: open-observe
|
||||
- name: ZO_S3_REGION_NAME
|
||||
value:
|
||||
_default: ru-central1
|
||||
- name: ZO_TELEMETRY
|
||||
value:
|
||||
_default: "false"
|
||||
serviceAccount:
|
||||
enabled: true
|
||||
name:
|
||||
_default: openobserve-vault
|
||||
imagePullSecrets:
|
||||
create:
|
||||
_default: false
|
||||
name:
|
||||
_default: regcred
|
||||
openobserve:
|
||||
secret:
|
||||
create: false
|
||||
nats:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClassName: csi-disk
|
||||
otelCollector:
|
||||
enabled: false
|
||||
vault:
|
||||
enabled: true
|
||||
@ -0,0 +1,92 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: opentelemetry-collector
|
||||
namespace: opentelemetry-collector
|
||||
spec:
|
||||
install:
|
||||
disableWait: true
|
||||
upgrade:
|
||||
disableWait: true
|
||||
dependsOn:
|
||||
- name: prometheus-stack
|
||||
namespace: prometheus-stack
|
||||
- name: openobserve
|
||||
namespace: openobserve
|
||||
interval: 5m
|
||||
timeout: 30m
|
||||
values:
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
podAnnotations:
|
||||
vault.hashicorp.com/agent-init-first: "true"
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/auth-path: auth/kubernetes
|
||||
vault.hashicorp.com/role: openobserve
|
||||
vault.hashicorp.com/agent-inject-secret-openobserve-basic-auth: secrets/data/vault/apps/openobserve
|
||||
vault.hashicorp.com/agent-inject-template-openobserve-basic-auth: |-
|
||||
{{ "{{- with secret \"secrets/data/vault/apps/openobserve\" -}}" }}
|
||||
Basic {{ "{{ index .Data.data \"OPENOBSERVE_BASIC_AUTH\" }}" }}
|
||||
{{ "{{- end -}}" }}
|
||||
mode: daemonset
|
||||
fullnameOverride: otel-collector
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
rollout:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 4
|
||||
presets:
|
||||
logsCollection:
|
||||
enabled: true
|
||||
includeCollectorLogs: false
|
||||
kubernetesAttributes:
|
||||
enabled: true
|
||||
config:
|
||||
receivers:
|
||||
filelog:
|
||||
include:
|
||||
- /var/log/pods/*/*/*.log
|
||||
exclude:
|
||||
- /var/log/pods/opentelemetry-collector_*/*/*.log
|
||||
start_at: end
|
||||
operators:
|
||||
- type: container
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: ${env:MY_POD_IP}:4317
|
||||
http:
|
||||
endpoint: ${env:MY_POD_IP}:4318
|
||||
processors:
|
||||
batch: {}
|
||||
k8sattributes: {}
|
||||
exporters:
|
||||
otlp:
|
||||
endpoint: http://openobserve-web.openobserve.svc.cluster.local:5080
|
||||
tls:
|
||||
insecure: true
|
||||
otlphttp/openobserve:
|
||||
endpoint: http://openobserve-web.openobserve.svc.cluster.local:5080/api/default
|
||||
headers:
|
||||
Authorization: ${file:/vault/secrets/openobserve-basic-auth}
|
||||
tls:
|
||||
insecure: true
|
||||
service:
|
||||
pipelines:
|
||||
logs:
|
||||
receivers:
|
||||
- filelog
|
||||
processors:
|
||||
- k8sattributes
|
||||
- batch
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
traces:
|
||||
receivers:
|
||||
- otlp
|
||||
processors:
|
||||
- k8sattributes
|
||||
- batch
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
@ -0,0 +1,19 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: opentelemetry-operator
|
||||
namespace: opentelemetry-operator
|
||||
spec:
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
values:
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
manager:
|
||||
collectorImage:
|
||||
repository: cr.yandex/crp3ccidau046kdj8g9q/opentelemetry-collector
|
||||
admissionWebhooks:
|
||||
certManager:
|
||||
enabled: false
|
||||
autoGenerateCert:
|
||||
enabled: true
|
||||
@ -0,0 +1,73 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: postgres-exporter
|
||||
namespace: postgres-exporter
|
||||
spec:
|
||||
dependsOn:
|
||||
- name: prometheus-stack
|
||||
namespace: prometheus-stack
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
chart:
|
||||
spec:
|
||||
version: 0.0.2-prod
|
||||
values:
|
||||
image:
|
||||
name: cr.yandex/crp3ccidau046kdj8g9q/postgres-exporter:preprod_21350302
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets:
|
||||
- dockerhub
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
namespace: postgres-exporter
|
||||
config:
|
||||
datasource:
|
||||
host: 192.168.10.8
|
||||
user: root
|
||||
port: '5432'
|
||||
database: postgres
|
||||
sslmode: disable
|
||||
datasources:
|
||||
- name: attachments
|
||||
uri: 192.168.10.8:5432/attachments_db?sslmode=disable
|
||||
- name: bim
|
||||
uri: 192.168.10.8:5432/bimapidb?sslmode=disable
|
||||
- name: comparisons
|
||||
uri: 192.168.10.8:5432/comparisons_db?sslmode=disable
|
||||
- name: django
|
||||
uri: 192.168.10.8:5432/sarex_db?sslmode=disable
|
||||
- name: documentations
|
||||
uri: 192.168.10.8:5432/documentations?sslmode=disable
|
||||
- name: drawings
|
||||
uri: 192.168.10.8:5432/drawings?sslmode=disable
|
||||
- name: eav
|
||||
uri: 192.168.10.8:5432/eav?sslmode=disable
|
||||
- name: flows
|
||||
uri: 192.168.10.8:5432/flows_db?sslmode=disable
|
||||
- name: inspections
|
||||
uri: 192.168.10.8:5432/inspections_db?sslmode=disable
|
||||
- name: issues
|
||||
uri: 192.168.10.8:5432/issues?sslmode=disable
|
||||
- name: notes
|
||||
uri: 192.168.10.8:5432/notes_db?sslmode=disable
|
||||
- name: openobserve
|
||||
uri: 192.168.10.8:5432/openobserve?sslmode=disable
|
||||
- name: postgres
|
||||
uri: 192.168.10.8:5432/postgres?sslmode=disable
|
||||
- name: resources
|
||||
uri: 192.168.10.8:5432/resources?sslmode=disable
|
||||
- name: rfi
|
||||
uri: 192.168.10.8:5432/rfi_db?sslmode=disable
|
||||
- name: subscriptions
|
||||
uri: 192.168.10.8:5432/subscriptions?sslmode=disable
|
||||
- name: system-log
|
||||
uri: 192.168.10.8:5432/system_log?sslmode=disable
|
||||
- name: transmittal
|
||||
uri: 192.168.10.8:5432/transmittal_db?sslmode=disable
|
||||
- name: workflow
|
||||
uri: 192.168.10.8:5432/workflows_db?sslmode=disable
|
||||
- name: workspaces
|
||||
uri: 192.168.10.8:5432/workspaces_db?sslmode=disable
|
||||
- name: zitadel
|
||||
uri: 192.168.10.8:5432/zitadel?sslmode=disable
|
||||
File diff suppressed because one or more lines are too long
106
clusters/brusnika-prod/infrastructure/patches/vmstack.yaml
Normal file
106
clusters/brusnika-prod/infrastructure/patches/vmstack.yaml
Normal file
@ -0,0 +1,106 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: vmstack
|
||||
namespace: vmstack
|
||||
spec:
|
||||
interval: 5m
|
||||
timeout: 20m
|
||||
values:
|
||||
global:
|
||||
clusterLabel: cluster
|
||||
nameOverride: vmstack
|
||||
fullnameOverride: vmstack
|
||||
defaultRules:
|
||||
create: true
|
||||
victoria-metrics-operator:
|
||||
admissionWebhooks:
|
||||
enabled: false
|
||||
vmsingle:
|
||||
enabled: true
|
||||
spec:
|
||||
retentionPeriod: 1w
|
||||
replicaCount: 1
|
||||
storage:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
storageClassName: csi-disk
|
||||
vmcluster:
|
||||
enabled: false
|
||||
vmagent:
|
||||
enabled: true
|
||||
spec:
|
||||
remoteWrite:
|
||||
- url: http://vmsingle-vmstack.vmstack.svc.cluster.local.:8428/api/v1/write
|
||||
vmalert:
|
||||
enabled: true
|
||||
spec:
|
||||
updateStrategy: Recreate
|
||||
port: "8880"
|
||||
evaluationInterval: 30s
|
||||
externalLabels:
|
||||
cluster: brusnika-prod
|
||||
notifiers:
|
||||
- url: http://vmalertmanager-vmstack-0.vmalertmanager-vmstack.vmstack.svc.cluster.local:9093
|
||||
- url: https://vmalertmanager.sarex.tech
|
||||
extraArgs:
|
||||
external.url: https://vmalert.brusnika.onprem.sarex.io
|
||||
ingress:
|
||||
enabled: false
|
||||
alertmanager:
|
||||
enabled: true
|
||||
spec:
|
||||
replicaCount: 1
|
||||
externalURL: https://vmalert.brusnika.onprem.sarex.io
|
||||
volumes: []
|
||||
volumeMounts: []
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
receiver: goalert
|
||||
group_by:
|
||||
- alertname
|
||||
- namespace
|
||||
- pod
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 6h
|
||||
receivers:
|
||||
- name: "null"
|
||||
- name: goalert
|
||||
webhook_configs:
|
||||
- url: http://goalert.goalert.svc.cluster.local:8081/api/v2/prometheusalertmanager/incoming
|
||||
send_resolved: true
|
||||
kube-state-metrics:
|
||||
enabled: true
|
||||
prometheus-node-exporter:
|
||||
enabled: false
|
||||
vmScrape:
|
||||
enabled: false
|
||||
spec:
|
||||
jobLabel: jobLabel
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}'
|
||||
endpoints:
|
||||
- port: metrics
|
||||
relabelConfigs:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: node
|
||||
- targetLabel: job
|
||||
replacement: vm-stack/vm-prod-node-exporter
|
||||
|
||||
metricRelabelConfigs:
|
||||
- action: drop
|
||||
source_labels: [mountpoint]
|
||||
regex: "/var/lib/kubelet/pods.+"
|
||||
vmNodeScrape:
|
||||
enabled: false
|
||||
grafana:
|
||||
enabled: false
|
||||
Loading…
Reference in New Issue
Block a user