Add pg sanitizer Argo workflow

This commit is contained in:
Kochetkov S 2026-06-11 13:18:59 +03:00
parent ef69ec43a5
commit c6f31a50bd
6 changed files with 158 additions and 0 deletions

View File

@ -4,6 +4,7 @@ resources:
- ../../../infrastructure/argo-workflows
- ../../../infrastructure/argo-events
- ../../../infrastructure/postgresql
- ../../../infrastructure/pg-sanitizer
- ../../../infrastructure/vault-unseal
- ../../../infrastructure/vault
- ../../../infrastructure/yc-pg-dumper

View File

@ -0,0 +1,22 @@
apiVersion: argoproj.io/v1alpha1
kind: CronWorkflow
metadata:
name: company-dump-sanitizer-daily
namespace: argo
spec:
schedule: "0 7 * * *"
timezone: Europe/Moscow
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
workflowSpec:
workflowTemplateRef:
name: company-dump-sanitizer
arguments:
parameters:
- name: company_ids
value: "[330]"
- name: bucket
value: prod-pg-dumps
- name: source_prefix
value: ""

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- serviceaccount.yaml
- workflowtemplate.yaml
- cronworkflow.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: pg-sanitizer
namespace: argo
automountServiceAccountToken: true

View File

@ -0,0 +1,119 @@
apiVersion: argoproj.io/v1alpha1
kind: WorkflowTemplate
metadata:
name: company-dump-sanitizer
namespace: argo
spec:
entrypoint: sanitize-companies
serviceAccountName: pg-sanitizer
parallelism: 1
activeDeadlineSeconds: 86400
ttlStrategy:
secondsAfterSuccess: 259200
secondsAfterFailure: 259200
podMetadata:
annotations:
vault.hashicorp.com/agent-init-first: "true"
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/auth-path: "auth/kubernetes"
vault.hashicorp.com/role: "pg-sanitizer"
vault.hashicorp.com/agent-inject-secret-pg-sanitizer-env: "secrets/data/yc-infra-prod/infra/pg_dumper/pg_dumper_secret"
vault.hashicorp.com/agent-inject-template-pg-sanitizer-env: |
{{- with secret "secrets/data/yc-infra-prod/infra/pg_dumper/pg_dumper_secret" -}}
export AWS_ACCESS_KEY_ID={{ printf "%q" .Data.data.aws_access_key_id }}
export AWS_SECRET_ACCESS_KEY={{ printf "%q" .Data.data.aws_secret_access_key }}
{{- end }}
{{- with secret "secrets/data/yc-infra-prod/infra/postgresql_contour/postgresql_contour_secret" -}}
export PGPASSWORD={{ printf "%q" (index .Data.data "postgres-password") }}
{{- end }}
arguments:
parameters:
- name: company_ids
value: "[330]"
- name: bucket
value: prod-pg-dumps
- name: source_prefix
value: ""
- name: s3_endpoint_url
value: https://storage.yandexcloud.net
- name: db_items
value: >-
attachments:attachments resources:resources inspections:inspections issues:issues
notes:notes processing:processing workspaces:workspaces comparisons:comparisons
checklists:checklists contracts:contracts drawings:drawings remarks:remarks
subscriptions:subscriptions system-log:system-log transmittal:transmittal
pm_db:pm_db pulse_db:pulse_db eav:eav documentations:documentations
flows:restored_flow_db rfi:rfi sarex_db:sarex_db
templates:
- name: sanitize-companies
steps:
- - name: sanitize-company
template: sanitize-company
arguments:
parameters:
- name: company_id
value: "{{item}}"
withParam: "{{workflow.parameters.company_ids}}"
- name: sanitize-company
inputs:
parameters:
- name: company_id
nodeSelector:
dedicated: sts
tolerations:
- key: dedicated
operator: Equal
value: sts
effect: NoSchedule
container:
image: cr.yandex/crp3ccidau046kdj8g9q/yc-pg-sanitizer:0.1.0
imagePullPolicy: IfNotPresent
command:
- /usr/local/bin/company_sanitize_from_s3.sh
env:
- name: VAULT_ENV_FILE
value: /vault/secrets/pg-sanitizer-env
- name: COMPANY_ID
value: "{{inputs.parameters.company_id}}"
- name: S3_BUCKET
value: "{{workflow.parameters.bucket}}"
- name: SOURCE_PREFIX
value: "{{workflow.parameters.source_prefix}}"
- name: S3_ENDPOINT_URL
value: "{{workflow.parameters.s3_endpoint_url}}"
- name: RUN_ID
value: "{{workflow.name}}-company-{{inputs.parameters.company_id}}"
- name: PGHOST
value: postgresql.postgresql.svc
- name: PGPORT
value: "5432"
- name: PGUSER
value: postgres
- name: PGSSLMODE
value: disable
- name: PGRESTORE_JOBS
value: "6"
- name: DB_WORK_MEM
value: 256MB
- name: DB_MAINTENANCE_WORK_MEM
value: 2GB
- name: DB_TEMP_BUFFERS
value: 256MB
- name: DB_ITEMS
value: "{{workflow.parameters.db_items}}"
- name: WORK_ROOT
value: /work
resources:
requests:
cpu: "2"
memory: 4Gi
ephemeral-storage: 120Gi
volumeMounts:
- name: work
mountPath: /work
volumes:
- name: work
emptyDir:
sizeLimit: 120Gi

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base