Add observability stack to brusnika-stage

This commit is contained in:
Kochetkov S 2026-06-15 10:50:48 +03:00
parent 83965881c7
commit 374973950b
12 changed files with 759 additions and 2 deletions

View File

@ -7,6 +7,15 @@ resources:
- ../../../infrastructure/istio-config
- ../../../infrastructure/vault
- ../../../infrastructure/zitadel
- ../../../infrastructure/openobserve
- ../../../infrastructure/vmstack
- ../../../infrastructure/prometheus-stack
- ../../../infrastructure/opentelemetry-operator
- ../../../infrastructure/opentelemetry-collector
- ../../../infrastructure/goalert
- ../../../infrastructure/kafka-exporter
- ../../../infrastructure/postgres-exporter
- ./node-exporter-vmnodescrape.yaml
- ./lb-service-override.yaml
- ./vault-ingress.yaml
- ./clusterissuer-letsencrypt.yaml
@ -40,6 +49,62 @@ patches:
kind: HelmRelease
name: zitadel
namespace: zitadel
- path: ./patches/openobserve.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: openobserve
namespace: openobserve
- path: ./patches/vmstack.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: vmstack
namespace: vmstack
- path: ./patches/prometheus-stack.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: prometheus-stack
namespace: prometheus-stack
- path: ./patches/opentelemetry-operator.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: opentelemetry-operator
namespace: opentelemetry-operator
- path: ./patches/opentelemetry-collector.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: opentelemetry-collector
namespace: opentelemetry-collector
- path: ./patches/goalert.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: goalert
namespace: goalert
- path: ./patches/kafka-exporter.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: kafka-exporter
namespace: kafka-exporter
- path: ./patches/postgres-exporter.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: postgres-exporter
namespace: postgres-exporter
- path: ./patches/failed-pod-cleanup.yaml
target:
@ -47,4 +112,4 @@ patches:
version: v2
kind: HelmRelease
name: failed-pod-cleanup
namespace: default
namespace: default

View File

@ -0,0 +1,20 @@
apiVersion: operator.victoriametrics.com/v1beta1
kind: VMNodeScrape
metadata:
name: vm-stage-node-exporter
namespace: vmstack
spec:
path: /metrics
port: "9100"
scheme: http
selector: {}
relabelConfigs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- action: replace
sourceLabels:
- __meta_kubernetes_node_name
targetLabel: node
- action: replace
replacement: brusnika-stage/vm-node-exporter
targetLabel: job

View File

@ -0,0 +1,35 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: goalert
namespace: goalert
spec:
interval: 5m
timeout: 10m
values:
image:
name: cr.yandex/crp3ccidau046kdj8g9q/goalert
tag: 0.32.0
pullPolicy: IfNotPresent
goalert:
environment:
GOALERT_HTTP_PREFIX: ""
postgresql:
enabled: true
auth:
database: goalert
username: goalert
sslmode: disable
primary:
persistence:
enabled: true
storageClass: csi-disk
size: 10Gi
ingress:
enabled: false
className: nginx
hosts:
- host: vmalert.test.sarex.brusnika.tech
paths:
- path: /
pathType: Prefix

View File

@ -129,6 +129,27 @@ spec:
issuerRef:
name: letsencrypt
kind: ClusterIssuer
grafana-tls:
namespace: ingress-nginx
dnsNames:
- grafana.test.sarex.brusnika.tech
issuerRef:
name: letsencrypt
kind: ClusterIssuer
openobserve-tls:
namespace: ingress-nginx
dnsNames:
- openobserve.test.sarex.brusnika.tech
issuerRef:
name: letsencrypt
kind: ClusterIssuer
vmalert-tls:
namespace: ingress-nginx
dnsNames:
- vmalert.test.sarex.brusnika.tech
issuerRef:
name: letsencrypt
kind: ClusterIssuer
istio:
envoyFilters: {}
authorizationPolicies: {}
@ -294,6 +315,36 @@ spec:
- zitadel.test.sarex.brusnika.tech
tls:
credentialName: zitadel-tls
grafana:
name: grafana-gw
namespace: ingress-nginx
selector:
istio: ingressgateway
servers:
- hosts:
- grafana.test.sarex.brusnika.tech
tls:
credentialName: grafana-tls
openobserve:
name: openobserve-gw
namespace: ingress-nginx
selector:
istio: ingressgateway
servers:
- hosts:
- openobserve.test.sarex.brusnika.tech
tls:
credentialName: openobserve-tls
vmalert:
name: vmalert-gw
namespace: ingress-nginx
selector:
istio: ingressgateway
servers:
- hosts:
- vmalert.test.sarex.brusnika.tech
tls:
credentialName: vmalert-tls
virtualServices:
camunda-identity-vs:
@ -570,6 +621,41 @@ spec:
prefix: /
service: zitadel-idp-contour.zitadel.svc.cluster.local
port: 8080
grafana-vs:
namespace: prometheus-stack
hosts:
- grafana.test.sarex.brusnika.tech
gateways:
- ingress-nginx/grafana-gw
routes:
- path:
prefix: /
service: prometheus-stack-grafana.prometheus-stack.svc.cluster.local
port: 80
openobserve-vs:
namespace: openobserve
hosts:
- openobserve.test.sarex.brusnika.tech
gateways:
- ingress-nginx/openobserve-gw
routes:
- path:
prefix: /
service: openobserve-web.openobserve.svc.cluster.local
port: 5080
vmalert-vs:
namespace: goalert
hosts:
- vmalert.test.sarex.brusnika.tech
gateways:
- ingress-nginx/vmalert-gw
routes:
- path:
prefix: /
service: goalert.goalert.svc.cluster.local
port: 8081
# ---- Migrated from ugok snapshot (batch 1) ----
vs-bimv2-api:

View File

@ -0,0 +1,26 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kafka-exporter
namespace: kafka-exporter
spec:
interval: 5m
timeout: 10m
values:
image:
repository: cr.yandex/crp3ccidau046kdj8g9q/kafka-exporter
tag: latest
pullPolicy: IfNotPresent
kafkaExporter:
kafka:
servers:
- kafka.kafka.svc.cluster.local:9092
sasl:
enabled: false
tls:
enabled: false
prometheus:
serviceMonitor:
enabled: true
namespace: kafka-exporter
interval: 30s

View File

@ -0,0 +1,128 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: openobserve
namespace: openobserve
spec:
interval: 5m
timeout: 10m
values:
universal-chart:
services:
openobserve:
deployment:
replicaCount:
_default: 1
envs:
- name: ZO_HTTP_PORT
value:
_default: "5080"
- name: ZO_LOCAL_MODE
value:
_default: "false"
- name: ZO_META_STORE
value:
_default: postgres
- name: ZO_CLUSTER_COORDINATOR
value:
_default: nats
- name: ZO_NATS_REPLICAS
value:
_default: "1"
- name: ZO_S3_PROVIDER
value:
_default: s3
- name: ZO_S3_SERVER_URL
value:
_default: http://minio.minio.svc.cluster.local:9000
- name: ZO_S3_BUCKET_NAME
value:
_default: open-observe
- name: ZO_S3_REGION_NAME
value:
_default: ru-central1
- name: ZO_TELEMETRY
value:
_default: "false"
secretEnvs:
- name: ZO_ROOT_USER_EMAIL
secretName:
_default: openobserve-secret
secretKey: ZO_ROOT_USER_EMAIL
- name: ZO_ROOT_USER_PASSWORD
secretName:
_default: openobserve-secret
secretKey: ZO_ROOT_USER_PASSWORD
- name: ZO_META_POSTGRES_DSN
secretName:
_default: openobserve-secret
secretKey: ZO_META_POSTGRES_DSN
- name: ZO_NATS_ADDR
secretName:
_default: openobserve-secret
secretKey: ZO_NATS_ADDR
- name: PGHOST
secretName:
_default: openobserve-secret
secretKey: PGHOST
- name: PGPORT
secretName:
_default: openobserve-secret
secretKey: PGPORT
- name: PGDATABASE
secretName:
_default: openobserve-secret
secretKey: PGDATABASE
- name: PGUSER
secretName:
_default: openobserve-secret
secretKey: PGUSER
- name: PGPASSWORD
secretName:
_default: openobserve-secret
secretKey: PGPASSWORD
- name: PGSSLMODE
secretName:
_default: openobserve-secret
secretKey: PGSSLMODE
- name: ZO_S3_ACCESS_KEY
secretName:
_default: openobserve-secret
secretKey: ZO_S3_ACCESS_KEY
- name: ZO_S3_SECRET_KEY
secretName:
_default: openobserve-secret
secretKey: ZO_S3_SECRET_KEY
serviceAccount:
enabled: true
name:
_default: openobserve-vault
imagePullSecrets:
create:
_default: false
name:
_default: regcred
openobserve:
secret:
create: true
extraNamespaces:
- opentelemetry-collector
data:
ZO_ROOT_USER_EMAIL: admin@openobserve.test.sarex.brusnika.tech
PGHOST: 192.168.2.45
PGPORT: "5432"
PGDATABASE: openobserve
PGUSER: openobserve
PGSSLMODE: disable
ZO_NATS_ADDR: nats://openobserve-nats:4222
nats:
enabled: true
replicaCount: 1
persistence:
enabled: true
size: 10Gi
storageClassName: csi-disk
otelCollector:
enabled: false
vault:
enabled: true

View File

@ -0,0 +1,76 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: opentelemetry-collector
namespace: opentelemetry-collector
spec:
interval: 5m
timeout: 30m
values:
imagePullSecrets:
- name: regcred
mode: daemonset
fullnameOverride: otel-collector
rollout:
rollingUpdate:
maxUnavailable: 4
extraEnvs:
- name: OPENOBSERVE_BASIC_AUTH
valueFrom:
secretKeyRef:
name: openobserve-secret
key: OPENOBSERVE_BASIC_AUTH
presets:
logsCollection:
enabled: true
includeCollectorLogs: false
kubernetesAttributes:
enabled: true
config:
receivers:
filelog:
include:
- /var/log/pods/*/*/*.log
exclude:
- /var/log/pods/opentelemetry-collector_*/*/*.log
start_at: end
operators:
- type: container
otlp:
protocols:
grpc:
endpoint: ${env:MY_POD_IP}:4317
http:
endpoint: ${env:MY_POD_IP}:4318
processors:
batch: {}
k8sattributes: {}
exporters:
otlp:
endpoint: http://openobserve-web.openobserve.svc.cluster.local:5080
tls:
insecure: true
otlphttp/openobserve:
endpoint: http://openobserve-web.openobserve.svc.cluster.local:5080/api/default
headers:
Authorization: Basic ${env:OPENOBSERVE_BASIC_AUTH}
tls:
insecure: true
service:
pipelines:
logs:
receivers:
- filelog
processors:
- k8sattributes
- batch
exporters:
- otlphttp/openobserve
traces:
receivers:
- otlp
processors:
- k8sattributes
- batch
exporters:
- otlphttp/openobserve

View File

@ -0,0 +1,19 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: opentelemetry-operator
namespace: opentelemetry-operator
spec:
interval: 5m
timeout: 10m
values:
imagePullSecrets:
- name: regcred
manager:
collectorImage:
repository: cr.yandex/crp3ccidau046kdj8g9q/opentelemetry-collector
admissionWebhooks:
certManager:
enabled: false
autoGenerateCert:
enabled: true

View File

@ -0,0 +1,70 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: postgres-exporter
namespace: postgres-exporter
spec:
interval: 5m
timeout: 10m
chart:
spec:
version: 0.0.2-prod
values:
image:
name: cr.yandex/crp3ccidau046kdj8g9q/postgres-exporter:preprod_21350302
pullPolicy: IfNotPresent
pullSecrets:
- dockerhub
serviceMonitor:
enabled: true
namespace: postgres-exporter
config:
datasource:
host: 192.168.2.45
user: root
port: '5432'
database: postgres
sslmode: disable
datasources:
- name: attachments
uri: 192.168.2.45:5432/attachments_db?sslmode=disable
- name: bim
uri: 192.168.2.45:5432/bimapidb?sslmode=disable
- name: comparisons
uri: 192.168.2.45:5432/comparisons_db?sslmode=disable
- name: django
uri: 192.168.2.45:5432/sarex_db?sslmode=disable
- name: documentations
uri: 192.168.2.45:5432/documentations?sslmode=disable
- name: drawings
uri: 192.168.2.45:5432/drawings?sslmode=disable
- name: eav
uri: 192.168.2.45:5432/eav?sslmode=disable
- name: flows
uri: 192.168.2.45:5432/flows_db?sslmode=disable
- name: inspections
uri: 192.168.2.45:5432/inspections_db?sslmode=disable
- name: issues
uri: 192.168.2.45:5432/issues?sslmode=disable
- name: notes
uri: 192.168.2.45:5432/notes_db?sslmode=disable
- name: openobserve
uri: 192.168.2.45:5432/openobserve?sslmode=disable
- name: postgres
uri: 192.168.2.45:5432/postgres?sslmode=disable
- name: resources
uri: 192.168.2.45:5432/resources?sslmode=disable
- name: rfi
uri: 192.168.2.45:5432/rfi_db?sslmode=disable
- name: subscriptions
uri: 192.168.2.45:5432/subscriptions?sslmode=disable
- name: system-log
uri: 192.168.2.45:5432/system_log?sslmode=disable
- name: transmittal
uri: 192.168.2.45:5432/transmittal_db?sslmode=disable
- name: workflow
uri: 192.168.2.45:5432/workflows_db?sslmode=disable
- name: workspaces
uri: 192.168.2.45:5432/workspaces_db?sslmode=disable
- name: zitadel
uri: 192.168.2.45:5432/zitadel?sslmode=disable

View File

@ -0,0 +1,134 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: prometheus-stack
namespace: prometheus-stack
spec:
interval: 5m
timeout: 20m
values:
fullnameOverride: prometheus-stack
crds:
enabled: true
defaultRules:
create: true
alertmanager:
enabled: false
prometheus:
enabled: false
prometheusOperator:
enabled: true
kubeStateMetrics:
enabled: false
nodeExporter:
enabled: true
prometheus-node-exporter:
prometheus:
monitor:
enabled: false
jobLabel: node-exporter
extraServiceMonitors:
- name: zitadel-external-metrics
namespace: prometheus-stack
labels:
release: prometheus
spec:
endpoints:
- interval: 30s
scheme: https
path: /debug/metrics
port: https
tlsConfig:
serverName: zitadel.test.sarex.brusnika.tech
insecureSkipVerify: false
relabelings:
- targetLabel: __address__
replacement: zitadel.test.sarex.brusnika.tech:443
namespaceSelector:
matchNames:
- prometheus-stack
selector:
matchLabels:
app: zitadel-metrics-ext
- name: prometheus-istio-ingressgateway
namespace: istio-system
labels:
release: prometheus
spec:
endpoints:
- interval: 15s
port: status-port
jobLabel: istio
namespaceSelector:
any: true
selector:
matchExpressions:
- key: istio
operator: In
values:
- ingressgateway
- name: prometheus-istio-istiod
namespace: istio-system
labels:
release: prometheus
spec:
endpoints:
- interval: 15s
port: http-monitoring
jobLabel: istio
namespaceSelector:
any: true
selector:
matchExpressions:
- key: istio
operator: In
values:
- pilot
grafana:
enabled: true
adminUser: grafana-admin
admin:
existingSecret: grafana-admin
userKey: admin-user
passwordKey: admin-password
extraObjects:
- |
{{- $secret := lookup "v1" "Secret" .Release.Namespace "grafana-admin" }}
apiVersion: v1
kind: Secret
metadata:
name: grafana-admin
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: Helm
type: Opaque
data:
admin-user: {{ .Values.adminUser | b64enc | quote }}
admin-password: {{ if $secret }}{{ index $secret.data "admin-password" | quote }}{{ else }}{{ randAlphaNum 40 | b64enc | quote }}{{ end }}
persistence:
enabled: true
type: sts
storageClassName: csi-disk
accessModes:
- ReadWriteOnce
size: 20Gi
ingress:
enabled: false
env:
GF_SERVER_DOMAIN: grafana.test.sarex.brusnika.tech
GF_SERVER_ROOT_URL: https://grafana.test.sarex.brusnika.tech/
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
label: grafana_dashboard
labelValue: "1"
datasources:
enabled: true
defaultDatasourceEnabled: true
isDefaultDatasource: true
name: VictoriaMetrics
uid: vm
url: http://vmsingle-vmstack.vmstack.svc.cluster.local:8428

View File

@ -11,4 +11,4 @@ spec:
- name: regcred
server:
dataStorage:
storageClass: local-path
storageClass: csi-disk

View File

@ -0,0 +1,98 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: vmstack
namespace: vmstack
spec:
interval: 5m
timeout: 20m
values:
global:
clusterLabel: brusnika-stage
nameOverride: vmstack
fullnameOverride: vmstack
defaultRules:
create: true
victoria-metrics-operator:
admissionWebhooks:
enabled: false
vmsingle:
enabled: true
spec:
retentionPeriod: 1w
replicaCount: 1
storage:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
storageClassName: csi-disk
vmcluster:
enabled: false
vmagent:
enabled: true
spec:
remoteWrite:
- url: http://vmsingle-vmstack.vmstack.svc.cluster.local.:8428/api/v1/write
vmalert:
enabled: true
spec:
evaluationInterval: 30s
extraArgs:
external.url: https://vmalert.test.sarex.brusnika.tech
ingress:
enabled: false
alertmanager:
enabled: true
spec:
replicaCount: 1
externalURL: https://vmalert.test.sarex.brusnika.tech
volumes: []
volumeMounts: []
config:
global:
resolve_timeout: 5m
route:
receiver: goalert
group_by:
- alertname
- namespace
- pod
group_wait: 30s
group_interval: 5m
repeat_interval: 6h
receivers:
- name: goalert
webhook_configs:
- url: http://goalert.goalert.svc.cluster.local:8081/api/v2/prometheusalertmanager/incoming
send_resolved: true
kube-state-metrics:
enabled: true
prometheus-node-exporter:
enabled: false
vmScrape:
enabled: false
spec:
jobLabel: jobLabel
selector:
matchLabels:
app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}'
endpoints:
- port: metrics
relabelConfigs:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: node
- targetLabel: job
replacement: brusnika-stage/vm-node-exporter
metricRelabelConfigs:
- action: drop
source_labels: [mountpoint]
regex: "/var/lib/kubelet/pods.+"
vmNodeScrape:
enabled: false
grafana:
enabled: false