apiVersion: v1kind: ConfigMapmetadata: name: argocd-cm namespace: argocddata:闆嗙兢閰嶇疆clusters:name: production-us-east-1 server: https://prod-eks-us-east-1.eks.amazonaws.com config: awsAuthConfig: clusterName: prod-eks-us-east-1 roleARN: arn:aws:iam::123456789012:role/argocd-manager tlsClientConfig: insecure: false caData: <base64-encoded-ca-cert>name: production-eu-west-1 server: https://prod-eks-eu-west-1.eks.amazonaws.com config: awsAuthConfig: clusterName: prod-eks-eu-west-1 roleARN: arn:aws:iam::123456789012:role/argocd-managername: staging-cluster server: https://staging-eks.eks.amazonaws.com config: awsAuthConfig: clusterName: staging-eks roleARN: arn:aws:iam::123456789012:role/argocd-staging-managername: edge-clusters server: https://edge-k8s-federation.example.com config: tlsClientConfig: insecure: false serverName: edge-federation璧勬簮鎺掗櫎閰嶇疆resource.exclusions:apiGroups:"coordination.k8s.io" kinds:"Lease" clusters:"*"apiGroups:"" kinds:"Event""Node" clusters:"edge-*"璧勬簮閰嶇疆resource.compare-options: ignoreAggregatedRoles: true ignoreResourceStatusField: crd server-side-diff: true鍚屾閫夐」resource.sync-options: respectRBAC: true createNamespace: true prunePropagationPolicy: foreground replace: false---argocd-rbac-cm.yaml - RBAC 閰嶇疆apiVersion: v1kind: ConfigMapmetadata: name: argocd-rbac-cm namespace: argocddata: policy.default: role:readonlypolicy.csv:闆嗙兢绠$悊鍛樻潈闄? p, role:cluster-admin, applications, , /*, allow p, role:cluster-admin, clusters, , , allow p, role:cluster-admin, repositories, , , allow p, role:cluster-admin, certificates, , , allow p, role:cluster-admin, projects, , , allow p, role:cluster-admin, accounts, , , allow p, role:cluster-admin, gpgkeys, , , allow椤圭洰绠$悊鍛樻潈闄? p, role:project-admin, applications, , production/, allow p, role:project-admin, repositories, get, *, allow p, role:project-admin, repositories, create, *, allow p, role:project-admin, repositories, update, *, allow p, role:project-admin, repositories, delete, *, allow鍙鏉冮檺 p, role:readonly, applications, get, /, allow p, role:readonly, applications, list, /, allow p, role:readonly, clusters, get, *, allow p, role:readonly, clusters, list, *, allow p, role:readonly, repositories, get, *, allow p, role:readonly, repositories, list, *, allow鍚屾鏉冮檺 p, role:sync, applications, sync, production/*, allow p, role:sync, applications, get, production/*, allow p, role:sync, applications, list, production/*, allow鐢ㄦ埛鍒嗙粍 g, cluster-admins, role:cluster-admin g, project-leads, role:project-admin g, developers, role:readonly g, release-managers, role:sync闆嗙兢鐗瑰畾鏉冮檺 g, prod-team, role:project-admin, production-us-east-1 g, prod-team, role:project-admin, production-eu-west-1 g, staging-team, role:project-admin, staging-cluster g, edge-team, role:project-admin, edge-clusters ### ApplicationSet 澶氱幆澧冪鐞? 鍒╃敤 ApplicationSet 瀹炵幇澶氱幆澧冨簲鐢ㄧ殑缁熶竴绠$悊鍜屽樊寮傚寲閰嶇疆锛? applicationset-multi-env.yamlapiVersion: argoproj.io/v1alpha1kind: ApplicationSetmetadata: name: microservices-multi-env namespace: argocdspec: generators:闆嗙兢鐢熸垚鍣? - clusters: selector: matchExpressions:key: environment operator: In values: ["production", "staging", "development"]Git 鐢熸垚鍣? - git: repoURL: https://github.com/example/microservices revision: HEAD directories:path: services/* template: metadata: name: '{{path.basename}}-{{name}}' spec: project: '{{metadata.labels.environment}}' source: repoURL: https://github.com/example/microservices targetRevision: '{{metadata.annotations.target_revision}}' path: '{{path}}' destination: server: '{{server}}' namespace: '{{path.basename}}-{{metadata.labels.environment}}' template: metadata: name: '{{service}}-{{cluster}}' labels: environment: '{{metadata.labels.environment}}' region: '{{metadata.labels.region}}' team: '{{metadata.labels.team}}' annotations: argocd.argoproj.io/sync-wave: "{{metadata.annotations.sync_wave}}" argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true spec: project: '{{metadata.labels.environment}}' source: repoURL: https://github.com/example/microservices targetRevision: '{{metadata.annotations.target_revision}}' path: 'services/{{service}}'Kustomize 閰嶇疆 kustomize: namePrefix: '{{metadata.labels.environment}}-' nameSuffix: '-{{metadata.labels.region}}' commonLabels: app.kubernetes.io/environment: '{{metadata.labels.environment}}' app.kubernetes.io/region: '{{metadata.labels.region}}' app.kubernetes.io/version: '{{metadata.annotations.version}}' patches:target: kind: Deployment name: '{{service}}'patch:op: replace path: /spec/replicas value: {{metadata.annotations.replicas}}op: add path: /spec/template/spec/containers/0/resources value: requests: cpu: '{{metadata.annotations.cpu_request}}' memory: '{{metadata.annotations.memory_request}}' limits: cpu: '{{metadata.annotations.cpu_limit}}' memory: '{{metadata.annotations.memory_limit}}' destination: server: '{{server}}' namespace: '{{service}}-{{metadata.labels.environment}}' syncPolicy: automated: prune: true selfHeal: true allowEmpty: false syncOptions:CreateNamespace=truePrunePropagationPolicy=foregroundRespectIgnoreDifferences=trueApplyOutOfSyncOnly=true retry: limit: 5 backoff: duration: 5s factor: 2 maxDuration: 3m managedNamespaceMetadata: labels: argocd.argoproj.io/managed-by: '{{cluster}}' environment: '{{metadata.labels.environment}}' revisionHistoryLimit: 10 ignoreDifferences:group: apps kind: Deployment jsonPointers:/spec/replicasgroup: "" kind: Service jsonPointers:/spec/clusterIP info:name: "Environment" value: "{{metadata.labels.environment}}"name: "Region" value: "{{metadata.labels.region}}"name: "Team" value: "{{metadata.labels.team}}"name: "Version" value: "{{metadata.annotations.version}}" ### 闆嗙兢瀵嗛挜绠$悊 涓轰笉鍚岄泦缇ら厤缃嫭绔嬬殑璁块棶鍑瘉鍜屾潈闄愭帶鍒讹細 cluster-secrets.yamlapiVersion: v1kind: Secretmetadata: name: cluster-production-us-east-1 namespace: argocd labels: argocd.argoproj.io/secret-type: clustertype: OpaquestringData: name: production-us-east-1 server: https://prod-eks-us-east-1.eks.amazonaws.comconfig: { "awsAuthConfig": { "clusterName": "prod-eks-us-east-1", "roleARN": "arn:aws:iam::123456789012:role/argocd-production-manager" }, "tlsClientConfig": { "insecure": false, "caData": "<base64-encoded-ca-cert>" } }---apiVersion: v1kind: Secretmetadata: name: cluster-edge-federation namespace: argocd labels: argocd.argoproj.io/secret-type: clustertype: OpaquestringData: name: edge-clusters server: https://edge-k8s-federation.example.comconfig: { "bearerToken": "<edge-cluster-token>", "tlsClientConfig": { "insecure": false, "serverName": "edge-federation", "caData": "<base64-encoded-ca-cert>" } } ## 閰嶇疆绠$悊涓庡樊寮傚寲绛栫暐 ### Kustomize 澶氱幆澧冮厤缃? 鍒╃敤 Kustomize 瀹炵幇鐜鐨勫樊寮傚寲閰嶇疆绠$悊锛? base/kustomization.yamlapiVersion: kustomize.config.k8s.io/v1beta1kind: Kustomizationresources:deployment.yamlservice.yamlconfigmap.yamlserviceaccount.yamlcommonLabels: app.kubernetes.io/part-of: microservices-platform app.kubernetes.io/managed-by: argocdimages:name: app-image newName: registry.example.com/microservices/app newTag: v1.2.3configMapGenerator:name: app-config literals:LOG_LEVEL=infoMETRICS_ENABLED=trueTRACING_ENABLED=truesecretGenerator:name: app-secrets type: Opaque literals:DATABASE_URL=<placeholder>API_KEY=<placeholder>patches:target: kind: Deployment name: apppatch:op: add path: /spec/template/spec/containers/0/envFrom value:configMapRef: name: app-configsecretRef: name: app-secrets---overlays/production/kustomization.yamlapiVersion: kustomize.config.k8s.io/v1beta1kind: Kustomizationnamespace: productionbases:../../basepatchesStrategicMerge:deployment-patch.yamlservice-patch.yamlconfigMapGenerator:name: app-config behavior: merge literals:LOG_LEVEL=warnREPLICAS=3ENVIRONMENT=productionsecretGenerator:name: app-secrets behavior: merge literals:DATABASE_URL=postgresql://prod-db.example.com:5432/myappREDIS_URL=redis://prod-redis.example.com:6379images:name: app-image newTag: v1.2.3-prodcommonLabels: app.kubernetes.io/environment: production app.kubernetes.io/version: v1.2.3-prodreplicas:name: app count: 3---overlays/production/deployment-patch.yamlapiVersion: apps/v1kind: Deploymentmetadata: name: appspec: template: spec: containers:name: app resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 4Gi livenessProbe: httpGet: path: /health port: 8080 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /ready port: 8080 initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 env:name: ENVIRONMENT value: productionname: REGION value: us-east-1name: CLUSTER value: production-us-east-1 ### Helm 鍙傛暟鍖栭厤缃? 浣跨敤 Helm 瀹炵幇鏇寸伒娲荤殑鍙傛暟鍖栭厤缃鐞嗭細 values-global.yaml - 鍏ㄥ眬閰嶇疆global: imageRegistry: registry.example.com imagePullPolicy: IfNotPresent闆嗙兢閰嶇疆 cluster: name: "{{ .Values.cluster.name }}" region: "{{ .Values.cluster.region }}" environment: "{{ .Values.cluster.environment }}"缃戠粶閰嶇疆 network: dnsDomain: cluster.local serviceCIDR: 10.96.0.0/12 podCIDR: 10.244.0.0/16瀛樺偍閰嶇疆 storage:storageClass: "{{ .Values.cluster.storageClassdefault \"standard\" }}" accessMode: ReadWriteOncevalues-production.yaml - 鐢熶骇鐜閰嶇疆cluster: name: production-us-east-1 region: us-east-1 environment: production storageClass: gp3replicaCount: 3image: repository: microservices/app tag: v1.2.3-prodresources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 4Giautoscaling: enabled: true minReplicas: 3 maxReplicas: 10 targetCPUUtilizationPercentage: 70 targetMemoryUtilizationPercentage: 80service: type: ClusterIP port: 80 targetPort: 8080 annotations: service.beta.kubernetes.io/aws-load-balancer-type: nlb service.beta.kubernetes.io/aws-load-balancer-backend-protocol: httpingress: enabled: true className: nginx annotations: nginx.ingress.kubernetes.io/rewrite-target: / nginx.ingress.kubernetes.io/ssl-redirect: "true" cert-manager.io/cluster-issuer: letsencrypt-prod hosts:host: app.example.com paths:path: / pathType: Prefix tls:secretName: app-tls hosts:app.example.commonitoring: enabled: true serviceMonitor: enabled: true interval: 30s path: /metrics labels: release: prometheus prometheusRule: enabled: true groups:name: app-alerts rules:alert: HighErrorRate expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1 for: 5m labels: severity: critical annotations: summary: High error rate detected description: Error rate is above 10% for 5 minutesvalues-staging.yaml - 娴嬭瘯鐜閰嶇疆cluster: name: staging-cluster region: us-west-2 environment: staging storageClass: gp2replicaCount: 1image: repository: microservices/app tag: v1.2.3-stagingresources: requests: cpu: 100m memory: 256Mi limits: cpu: 500m memory: 1Giautoscaling: enabled: falseingress: enabled: true className: nginx annotations: nginx.ingress.kubernetes.io/rewrite-target: / cert-manager.io/cluster-issuer: letsencrypt-staging hosts:host: app-staging.example.com paths:path: / pathType: Prefix ## 鍚屾绛栫暐涓庡啿绐佽В鍐? ### 鏅鸿兘鍚屾绛栫暐 瀹炵幇鍩轰簬渚濊禆鍏崇郴鍜屼紭鍏堢骇鐨勬櫤鑳藉悓姝ユ満鍒讹細 sync-policy.yamlapiVersion: argoproj.io/v1alpha1kind: Applicationmetadata: name: complex-microservice namespace: argocd annotations: argocd.argoproj.io/sync-wave: "5" argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=truespec: project: production source: repoURL: https://github.com/example/microservices targetRevision: HEAD path: applications/complex-service鍚屾閽╁瓙閰嶇疆 plugin: name: kustomize env:name: KUSTOMIZE_PLUGIN_HOME value: /custom-plugins destination: server: https://prod-eks-us-east-1.eks.amazonaws.com namespace: complex-service syncPolicy: automated: prune: true selfHeal: true allowEmpty: false syncOptions:CreateNamespace=truePrunePropagationPolicy=foregroundRespectIgnoreDifferences=trueApplyOutOfSyncOnly=trueServerSideApply=trueValidate=false閲嶈瘯绛栫暐 retry: limit: 10 backoff: duration: 5s factor: 2 maxDuration: 5m閲嶈瘯鏉′欢 retryStrategy: limit: 10 duration: 5s factor: 2 maxDuration: 5m鍚屾绐楀彛 syncWindows:kind: allow schedule: ' *' duration: 24h applications:'*' namespaces:'*' clusters:'*'kind: deny schedule: '0 2 *' duration: 1h applications:production-* namespaces:production clusters:production-*蹇界暐宸紓閰嶇疆 ignoreDifferences:group: apps kind: Deployment jsonPointers:/spec/replicas/spec/template/spec/containers/0/imagegroup: "" kind: Service jsonPointers:/spec/clusterIP/spec/clusterIPsgroup: networking.k8s.io kind: Ingress jsonPointers:/status淇鍘嗗彶闄愬埗 revisionHistoryLimit: 20鍋ュ悍妫€鏌ラ厤缃? health:# 鑷畾涔夊仴搴锋鏌? custom: hs = {} if obj.status ~= nil then if obj.status.conditions ~= nil then for i, condition in ipairs(obj.status.conditions) do if condition.type == "Ready" and condition.status == "False" then hs.status = "Degraded" hs.message = condition.message return hs end end end end hs.status = "Healthy" return hs ### 鍐茬獊妫€娴嬩笌瑙e喅 瀹炵幇鏅鸿兘鐨勫啿绐佹娴嬪拰鑷姩瑙e喅鏈哄埗锛? conflict-resolution.yamlapiVersion: v1kind: ConfigMapmetadata: name: argocd-cm namespace: argocddata:璧勬簮鍐茬獊瑙e喅绛栫暐resource.conflict-resolution:瀛楁绾у啿绐佹娴? fieldConflictDetection: true鍐茬獊瑙e喅绛栫暐 conflictResolutionStrategies:resource: Deployment fields:path: /spec/replicas strategy: server-preference # 浣跨敤鏈嶅姟鍣ㄧ鍊? path: /spec/template/spec/containers/0/image strategy: git-preference # 浣跨敤 Git 鍊? path: /spec/template/spec/containers/0/resources strategy: merge # 鍚堝苟绛栫暐resource: Service fields:path: /spec/clusterIP strategy: server-preference # 淇濈暀鏈嶅姟鍣ㄥ垎閰嶇殑 IPpath: /spec/ports strategy: git-preference # 浣跨敤 Git 閰嶇疆鑷姩鍐茬獊瑙e喅 autoConflictResolution: true鍐茬獊閫氱煡 conflictNotifications: enabled: true channels:type: slack webhook: https://hooks.slack.com/services/xxxtype: email recipients:[email protected]@example.com鍚屾绛栫暐閰嶇疆sync.policy:寮哄埗鍚屾绛栫暐 forceSync: enabled: false conditions:conflictDetectedresourceNotFoundpermissionDenied閫夋嫨鎬у悓姝? selectiveSync: enabled: true resources:group: apps kind: Deployment syncOptions:ServerSideApplyForce=truegroup: "" kind: ConfigMap syncOptions:Replace=true钃濈豢閮ㄧ讲鏀寔 blueGreenDeployment: enabled: true trafficManagement: type: istio virtualService: traffic-router destinationRules:primary-servicecanary-service ## 鐩戞帶瑙傛祴涓庡憡璀︿綋绯? ### 澶氶泦缇ょ洃鎺ф灦鏋? 鏋勫缓缁熶竴鐨勫闆嗙兢鐩戞帶鍜屽憡璀︿綋绯伙細 monitoring-stack.yamlapiVersion: v1kind: ServiceMonitormetadata: name: argocd-metrics namespace: monitoring labels: app.kubernetes.io/name: argocd-metrics app.kubernetes.io/part-of: monitoringspec: selector: matchLabels: app.kubernetes.io/name: argocd-metrics endpoints:port: metrics interval: 30s path: /metrics honorLabels: true relabelings:sourceLabels: [__meta_kubernetes_pod_annotation_argocd_cluster] targetLabel: clustersourceLabels: [__meta_kubernetes_pod_annotation_argocd_environment] targetLabel: environmentsourceLabels: [__meta_kubernetes_pod_annotation_argocd_region] targetLabel: region---apiVersion: monitoring.coreos.com/v1kind: PrometheusRulemetadata: name: argocd-alerts namespace: monitoringspec: groups:name: argocd.application.health interval: 30s rules:alert: ArgoCDApplicationDegradedexpr: argocd_app_health_status{health_status="Degraded"} > 0 for: 5m labels: severity: critical team: platform environment: "{{ $labels.environment }}" cluster: "{{ $labels.cluster }}" annotations: summary: "ArgoCD Application is Degraded" description: "Application {{ $labels.name }} in cluster {{ $labels.cluster }} is in Degraded state for more than 5 minutes" runbook_url: "https://wiki.example.com/runbooks/argocd-app-degraded"alert: ArgoCDApplicationProgressingexpr: argocd_app_health_status{health_status="Progressing"} > 0 for: 15m labels: severity: warning team: platform environment: "{{ $labels.environment }}" cluster: "{{ $labels.cluster }}" annotations: summary: "ArgoCD Application is Progressing for too long" description: "Application {{ $labels.name }} in cluster {{ $labels.cluster }} is in Progressing state for more than 15 minutes"alert: ArgoCDApplicationSyncFailedexpr: increase(argocd_app_sync_total{phase="Error"}[5m]) > 0 for: 1m labels: severity: critical team: platform environment: "{{ $labels.environment }}" cluster: "{{ $labels.cluster }}" annotations: summary: "ArgoCD Application sync failed" description: "Application {{ $labels.name }} in cluster {{ $labels.cluster }} failed to sync"name: argocd.cluster.health interval: 30s rules:alert: ArgoCDClusterConnectionFailedexpr: argocd_cluster_connection_status{connection_state=""} > 0 for: 3m labels: severity: critical team: platform annotations: summary: "ArgoCD cluster connection failed" description: "Cluster {{ $labels.server }} connection failed for more than 3 minutes"alert: ArgoCDClusterAPILatencyHighexpr: histogram_quantile(0.95, argocd_cluster_api_server_request_duration_seconds_bucket) > 1 for: 5m labels: severity: warning team: platform annotations: summary: "ArgoCD cluster API latency is high" description: "95th percentile API latency for cluster {{ $labels.server }} is above 1 second"name: argocd.repo.health interval: 30s rules:alert: ArgoCDRepoConnectionFailedexpr: argocd_repo_connection_status{connection_state="Failed"} > 0 for: 5m labels: severity: warning team: platform annotations: summary: "ArgoCD repository connection failed" description: "Repository {{ $labels.repo }} connection failed for more than 5 minutes" ### 澶氶泦缇よ娴嬮潰鏉? 鏋勫缓缁熶竴鐨勫闆嗙兢瑙傛祴闈㈡澘锛? { "dashboard": { "title": "ArgoCD Multi-Cluster Overview", "tags": ["argocd", "multi-cluster", "gitops"], "timezone": "browser", "panels": [ { "title": "Cluster Health Status", "type": "stat", "targets": [ { "expr": "sum by (cluster, environment, region) (argocd_cluster_connection_status)", "legendFormat": "{{cluster}} ({{environment}}-{{region}})" } ], "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "steps": [ {"color": "red", "value": 0}, {"color": "green", "value": 1} ] } } } }, { "title": "Application Health by Cluster", "type": "table", "targets": [ { "expr": "argocd_app_health_status", "legendFormat": "{{cluster}} - {{name}}" } ], "transformations": [ { "id": "organize", "options": { "excludeByName": {}, "indexByName": {}, "renameByName": { "cluster": "Cluster", "name": "Application", "health_status": "Health Status", "sync_status": "Sync Status" } } } ] }, { "title": "Sync Operations Rate", "type": "graph", "targets": [ { "expr": "rate(argocd_app_sync_total[5m])", "legendFormat": "{{cluster}} - {{phase}}" } ], "yAxes": [ { "label": "Syncs per second", "min": 0 } ] }, { "title": "Resource Usage by Cluster", "type": "piechart", "targets": [ { "expr": "sum by (cluster) (argocd_app_info)", "legendFormat": "{{cluster}}" } ] } ] }} ## 鐏鹃毦鎭㈠涓庝笟鍔¤繛缁€? ### 澶氶泦缇ゅ浠界瓥鐣? 瀹炵幇璺ㄩ泦缇ょ殑搴旂敤閰嶇疆鍜岀姸鎬佸浠斤細 backup-policy.yamlapiVersion: velero.io/v1kind: Schedulemetadata: name: argocd-backup namespace: velerospec: schedule: "0 2 *" # 姣忓ぉ鍑屾櫒 2 鐐瑰浠? template: includedNamespaces:argocdargocd-notificationsargocd-applications includedResources:applications.argoproj.ioapplicationsets.argoproj.ioappprojects.argoproj.iosecretsconfigmaps labelSelector: matchLabels: app.kubernetes.io/part-of: argocd storageLocation: s3-backup volumeSnapshotLocations: [aws-snapshot] hooks: resources:name: argocd-server-backup includedNamespaces:argocd labelSelector: matchLabels: app.kubernetes.io/name: argocd-server pre:exec: container: argocd-server command:/bin/sh-cargocd admin export > /tmp/argocd-backup.yaml post:exec: container: argocd-server command:/bin/sh-crm /tmp/argocd-backup.yaml---apiVersion: velero.io/v1kind: BackupStorageLocationmetadata: name: s3-backup namespace: velerospec: provider: aws objectStorage: bucket: argocd-backups prefix: multi-cluster config: region: us-east-1 s3ForcePathStyle: "false" s3Url: "" accessMode: ReadWrite---apiVersion: velero.io/v1kind: VolumeSnapshotLocationmetadata: name: aws-snapshot namespace: velerospec: provider: aws config: region: us-east-1 ### 鏁呴殰杞Щ鏈哄埗 瀹炵幇鑷姩鐨勯泦缇ゆ晠闅滄娴嬪拰鏁呴殰杞Щ锛? failover-automation.yamlapiVersion: argoproj.io/v1alpha1kind: ApplicationSetmetadata: name: disaster-recovery-apps namespace: argocdspec: generators:matrix: generators:涓婚泦缇ょ敓鎴愬櫒clusters: selector: matchLabels: cluster-role: primary搴旂敤妯℃澘list: elements:app: critical-service-1 namespace: production priority: high dr-enabled: true backup-cluster: disaster-recovery-us-west-2app: critical-service-2 namespace: production priority: high dr-enabled: true backup-cluster: disaster-recovery-us-west-2 template: metadata: name: '{{app}}-{{name}}' labels: disaster-recovery: enabled priority: '{{priority}}' primary-cluster: '{{name}}' backup-cluster: '{{backup-cluster}}' annotations: argocd.argoproj.io/sync-wave: "{{priority}}" spec: project: disaster-recovery source: repoURL: https://github.com/example/disaster-recovery targetRevision: HEAD path: 'apps/{{app}}' helm: valueFiles:values.yamlvalues-{{name}}.yaml parameters:name: cluster.role value: primaryname: cluster.backup value: '{{backup-cluster}}'name: app.priority value: '{{priority}}'name: app.namespace value: '{{namespace}}' destination: server: '{{server}}' namespace: '{{namespace}}' syncPolicy: automated: prune: false # 鐏鹃毦鎭㈠鍦烘櫙涓嬩笉鑷姩娓呯悊 selfHeal: true allowEmpty: false syncOptions:CreateNamespace=truePrunePropagationPolicy=foregroundRespectIgnoreDifferences=trueApplyOutOfSyncOnly=true retry: limit: 10 backoff: duration: 10s factor: 2 maxDuration: 10m---鏁呴殰妫€娴嬩笌鑷姩鏁呴殰杞Щ鑴氭湰apiVersion: v1kind: ConfigMapmetadata: name: failover-script namespace: argocddata:failover.sh: #!/bin/bash set -e PRIMARY_CLUSTER="production-us-east-1" BACKUP_CLUSTER="disaster-recovery-us-west-2" HEALTH_CHECK_INTERVAL=30 FAILURE_THRESHOLD=3鍋ュ悍妫€鏌ュ嚱鏁? check_cluster_health() { local cluster=$1 local failures=0 while [ $failures -lt $FAILURE_THRESHOLD ]; do if kubectl --cluster=$cluster get nodes --timeout=10s &>/dev/null; then echo "Cluster $cluster is healthy" return 0 else failures=$((failures + 1)) echo "Cluster $cluster health check failed ($failures/$FAILURE_THRESHOLD)" sleep $HEALTH_CHECK_INTERVAL fi done return 1 }鏁呴殰杞Щ鍑芥暟 trigger_failover() { echo "Triggering failover from $PRIMARY_CLUSTER to $BACKUP_CLUSTER"鏇存柊 ApplicationSet 浠ユ縺娲诲浠介泦缇? kubectl patch applicationset disaster-recovery-apps \ -n argocd \ --type merge \ -p '{"spec":{"generators":[{"matrix":{"generators":[{"clusters":{"selector":{"matchLabels":{"cluster-role":"backup"}}}},{"list":{"elements":[{"app":"critical-service-1","namespace":"production","priority":"high","dr-enabled":"true","backup-cluster":"disaster-recovery-us-west-2"}]}}]}}]}}'鏇存柊 DNS 璁板綍 update_dns_records閫氱煡鍥㈤槦 send_notification "failover-triggered" "$PRIMARY_CLUSTER" "$BACKUP_CLUSTER" }DNS 鏇存柊鍑芥暟 update_dns_records() {浣跨敤 Route53 鎴?Cloudflare API 鏇存柊 DNS 璁板綍 echo "Updating DNS records to point to backup cluster"鍏蜂綋瀹炵幇鍙栧喅浜?DNS 鎻愪緵鍟? }閫氱煡鍑芥暟 send_notification() { local event=$1 local from_cluster=$2 local to_cluster=$3鍙戦€?Slack 閫氱煡 curl -X POST -H 'Content-type: application/json' \ --data "{\"text\":\"Disaster recovery event: $event from $from_cluster to $to_cluster\"}" \ $SLACK_WEBHOOK_URL鍙戦€侀偖浠堕€氱煡echo "Disaster recovery event: $event"mail -s "DR Event Alert" [email protected] }涓诲惊鐜? while true; do if ! check_cluster_health $PRIMARY_CLUSTER; then echo "Primary cluster $PRIMARY_CLUSTER is unhealthy, triggering failover" trigger_failover break fi sleep $HEALTH_CHECK_INTERVAL done ## 楠岃瘉鏂规硶涓庢不鐞嗘寚鏍? ### 娌荤悊璐ㄩ噺璇勪及 鏋勫缓鍏ㄩ潰鐨勫闆嗙兢娌荤悊璐ㄩ噺璇勪及浣撶郴锛? #!/bin/bashgovernance-assessment.shset -eCLUSTERS=("production-us-east-1" "production-eu-west-1" "staging-cluster" "edge-clusters")ENVIRONMENTS=("production" "staging" "development")TOTAL_SCORE=0MAX_SCORE=100echo "馃殌 Starting ArgoCD Multi-Cluster Governance Assessment"echo "=================================================="1. 闆嗙兢杩炴帴鎬ф鏌?(20鍒?echo "馃摗 Checking cluster connectivity..."CLUSTER_SCORE=0for cluster in "${CLUSTERS[@]}"; doif argocd cluster listgrep -q "$cluster.*Successful"; then echo "鉁?$cluster: Connected" CLUSTER_SCORE=$((CLUSTER_SCORE + 5)) else echo "鉂?$cluster: Connection failed" fidoneecho "Cluster Connectivity Score: $CLUSTER_SCORE/20"TOTAL_SCORE=$((TOTAL_SCORE + CLUSTER_SCORE))2. 搴旂敤鍚屾鐘舵€佹鏌?(25鍒?echo "馃攧 Checking application sync status..."SYNC_SCORE=0for env in "${ENVIRONMENTS[@]}"; doSYNCED_APPS=$(argocd app list -p "$env" -o jsonjq '[.[]select(.status.sync.status == "Synced")]length')TOTAL_APPS=$(argocd app list -p "$env" -o jsonjq 'length') if [ "$TOTAL_APPS" -gt 0 ]; thenSYNC_RATE=$(echo "scale=2; $SYNCED_APPS / $TOTAL_APPS"bc)if (( $(echo "$SYNC_RATE >= 0.95"bc -l) )); then echo "鉁?$env: Sync rate ${SYNC_RATE}% (excellent)" SYNC_SCORE=$((SYNC_SCORE + 8))elif (( $(echo "$SYNC_RATE >= 0.90"bc -l) )); then echo "鈿狅笍 $env: Sync rate ${SYNC_RATE}% (good)" SYNC_SCORE=$((SYNC_SCORE + 5)) else echo "鉂?$env: Sync rate ${SYNC_RATE}% (poor)" SYNC_SCORE=$((SYNC_SCORE + 2)) fi fidoneecho "Application Sync Score: $SYNC_SCORE/25"TOTAL_SCORE=$((TOTAL_SCORE + SYNC_SCORE))3. 鍋ュ悍鐘舵€佹鏌?(20鍒?echo "馃彞 Checking application health..."HEALTH_SCORE=0for env in "${ENVIRONMENTS[@]}"; doHEALTHY_APPS=$(argocd app list -p "$env" -o jsonjq '[.[]select(.status.health.status == "Healthy")]length')TOTAL_APPS=$(argocd app list -p "$env" -o jsonjq 'length') if [ "$TOTAL_APPS" -gt 0 ]; thenHEALTH_RATE=$(echo "scale=2; $HEALTHY_APPS / $TOTAL_APPS"bc)if (( $(echo "$HEALTH_RATE >= 0.98"bc -l) )); then echo "鉁?$env: Health rate ${HEALTH_RATE}% (excellent)" HEALTH_SCORE=$((HEALTH_SCORE + 7))elif (( $(echo "$HEALTH_RATE >= 0.95"bc -l) )); then echo "鈿狅笍 $env: Health rate ${HEALTH_RATE}% (good)" HEALTH_SCORE=$((HEALTH_SCORE + 4)) else echo "鉂?$env: Health rate ${HEALTH_RATE}% (poor)" HEALTH_SCORE=$((HEALTH_SCORE + 1)) fi fidoneecho "Health Status Score: $HEALTH_SCORE/20"TOTAL_SCORE=$((TOTAL_SCORE + HEALTH_SCORE))4. 閰嶇疆涓€鑷存€ф鏌?(15鍒?echo "馃攳 Checking configuration consistency..."CONFIG_SCORE=0妫€鏌ヨ法闆嗙兢鐨勯厤缃竴鑷存€?CONSISTENT_CONFIGS=0TOTAL_CONFIGS=0for cluster in "${CLUSTERS[@]}"; do if kubectl --cluster="$cluster" get cm -n argocd argocd-cm -o yaml &>/dev/null; then TOTAL_CONFIGS=$((TOTAL_CONFIGS + 1))杩欓噷鍙互娣诲姞鍏蜂綋鐨勯厤缃竴鑷存€ф鏌ラ€昏緫 CONSISTENT_CONFIGS=$((CONSISTENT_CONFIGS + 1)) fidoneif [ "$TOTAL_CONFIGS" -gt 0 ]; thenCONSISTENCY_RATE=$(echo "scale=2; $CONSISTENT_CONFIGS / $TOTAL_CONFIGS"bc)if (( $(echo "$CONSISTENCY_RATE >= 0.95"bc -l) )); then echo "鉁?Configuration consistency: ${CONSISTENCY_RATE}%" CONFIG_SCORE=15elif (( $(echo "$CONSISTENCY_RATE >= 0.90"bc -l) )); then echo "鈿狅笍 Configuration consistency: ${CONSISTENCY_RATE}%" CONFIG_SCORE=10 else echo "鉂?Configuration consistency: ${CONSISTENCY_RATE}%" CONFIG_SCORE=5 fifiecho "Configuration Consistency Score: $CONFIG_SCORE/15"TOTAL_SCORE=$((TOTAL_SCORE + CONFIG_SCORE))5. 瀹夊叏鍚堣妫€鏌?(10鍒?echo "馃敀 Checking security compliance..."SECURITY_SCORE=0妫€鏌?RBAC 閰嶇疆if argocd proj list -o jsonjq -e '.[]select(.spec.roleslength > 0)' &>/dev/null; then echo "鉁?RBAC policies configured" SECURITY_SCORE=$((SECURITY_SCORE + 5))else echo "鉂?RBAC policies missing"fi# 妫€鏌ョ綉缁滅瓥鐣?if kubectl get networkpolicies -Agrep -q argocd; then echo "鉁?Network policies configured" SECURITY_SCORE=$((SECURITY_SCORE + 5))else echo "鉂?Network policies missing"fiecho "Security Compliance Score: $SECURITY_SCORE/10"TOTAL_SCORE=$((TOTAL_SCORE + SECURITY_SCORE))6. 鎬ц兘鎸囨爣妫€鏌?(10鍒?echo "鈿?Checking performance metrics..."PERF_SCORE=0# 妫€鏌ュ悓姝ユ椂闂?SYNC_TIME=$(argocd app list -o jsonjq -r '.[].status.operationState?.finishedAt // empty'head -1)if [ ! -z "$SYNC_TIME" ]; then echo "鉁?Recent sync operations detected" PERF_SCORE=$((PERF_SCORE + 5))else echo "鉂?No recent sync operations"fi妫€鏌ヨ祫婧愬埄鐢ㄧ巼if kubectl top pods -Agrep -q argocd; then echo "鉁?Resource monitoring available" PERF_SCORE=$((PERF_SCORE + 5))else echo "鉂?Resource monitoring unavailable"fiecho "Performance Score: $PERF_SCORE/10"TOTAL_SCORE=$((TOTAL_SCORE + PERF_SCORE))鎬荤粨鎶ュ憡echo "=================================================="echo "馃搳 ArgoCD Multi-Cluster Governance Assessment Complete"echo "Total Score: $TOTAL_SCORE/$MAX_SCORE ($(echo "scale=1; $TOTAL_SCORE * 100 / $MAX_SCORE"bc)%)"if [ "$TOTAL_SCORE" -ge 90 ]; then echo "馃帀 Excellent governance maturity!"elif [ "$TOTAL_SCORE" -ge 80 ]; then echo "馃憤 Good governance maturity, minor improvements needed"elif [ "$TOTAL_SCORE" -ge 70 ]; then echo "鈿狅笍 Fair governance maturity, significant improvements needed"else echo "馃毃 Poor governance maturity, immediate attention required"fi ### 娌荤悊璐ㄩ噺鎸囨爣 | 鎸囨爣绫诲埆 | 鐩爣鍊?| 娴嬮噺鏂规硶 | 楠岃瘉棰戠巼 | |---------|--------|----------|----------| | 闆嗙兢杩炴帴鎴愬姛鐜?| > 99.9% | 鍋ュ悍妫€鏌?| 瀹炴椂鐩戞帶 | | 搴旂敤鍚屾鎴愬姛鐜?| > 98% | 鍚屾鐘舵€佺粺璁?| 灏忔椂绾?| | 搴旂敤鍋ュ悍鐜?| > 99% | 鍋ュ悍鐘舵€佹鏌?| 瀹炴椂鐩戞帶 | | 閰嶇疆涓€鑷存€?| > 95% | 閰嶇疆瀵规瘮妫€鏌?| 姣忔棩 | | 瀹夊叏鍚堣鐜?| 100% | 瀹夊叏鎵弿 | 姣忓懆 | | 骞冲潎鍚屾鏃堕棿 | < 2鍒嗛挓 | 鎬ц兘鐩戞帶 | 灏忔椂绾?| | 鐏鹃毦鎭㈠鏃堕棿 | < 30鍒嗛挓 | DR 婕旂粌 | 姣忔湀 | | 鍙樻洿鎴愬姛鐜?| > 95% | 鍙樻洿璺熻釜 | 姣忓懆 | 閫氳繃绯荤粺鍖栫殑澶氶泦缇ゆ不鐞嗘灦鏋勩€佹櫤鑳藉寲鐨勫悓姝ョ瓥鐣ャ€佸畬鍠勭殑鐩戞帶瑙傛祴浣撶郴鍜屽彲闈犵殑鐏鹃毦鎭㈠鏈哄埗锛孉rgo CD 鍙互瀹炵幇浼佷笟绾х殑澶氶泦缇ゅ鐜娌荤悊锛屼负浜戝師鐢熷簲鐢ㄦ彁渚涚粺涓€銆侀珮鏁堛€佸彲闈犵殑浜や粯骞冲彴銆

发表评论 取消回复