维多利亚监控集群性能压测

压测环境

benchmark

安装 VM

安装 vmstorage

1
2
3
4
helm repo add vm https://victoriametrics.github.io/helm-charts/
helm repo update
helm show values vm/victoria-metrics-cluster > values.yaml
helm install victoria-metrics vm/victoria-metrics-cluster -f values.yaml -n monitoring

安装 vmauth

1
helm show values vm/victoria-metrics-auth > vmauth-values.yaml

设置用户配置

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
config:
  unauthorized_user:
    url_prefix: 
    - http://victoria-metrics-cluster-container-vmselect:8481/select/0/prometheus/
    - http://victoria-metrics-cluster-host-vmselect:8481/select/0/prometheus
    - http://victoria-metrics-cluster-container-vminsert:8480/insert/0/prometheus
    - http://victoria-metrics-cluster-host-vminsert:8480/insert/0/prometheus
    discover_backend_ips: true
    retry_status_codes: [500, 502]
    load_balancing_policy: least_loaded
  # Arbitrary number of usernames may be put here.
  # Usernames must be unique.
  users:
     - bearer_token: "Bearer yBYmTkjrAeo2RvYY"
       dump_request_on_errors: true
       url_prefix: "http://victoria-metrics-cluster-container-vmselect:8481/select/0/prometheus"
       headers:
         - "X-Scope-OrgID: grafana"
     - bearer_token: "E6PHKgRaHudz9mqkyAvWU7SDZMyvtu6V"
       url_prefix: "http://victoria-metrics-cluster-container-vmselect:8481/select/0/prometheus"
       headers:
         - "X-Scope-OrgID: devops"
       response_headers:
         - "X-Server-Hostname:"
       max_concurrent_requests: 100
       name: "devops-container"
     - bearer_token: "wEDu8YSRaA4kFr7s"
       url_prefix: "http://victoria-metrics-cluster-host-vmselect:8481/select/0/prometheus"
       headers:
         - "X-Scope-OrgID: grafana"
     - bearer_token: "6MWLfZsJq5YeQ7nUKVoaCGuZ6SBE76Nt"
       url_prefix: "http://victoria-metrics-cluster-host-vmselect:8481/select/0/prometheus"
       headers:
         - "X-Scope-OrgID: devops"
       max_concurrent_requests: 100
       name: "devops-host"
     - bearer_token: "HgLy2n2M8hLhMdTw"
       url_prefix: "http://victoria-metrics-cluster-container-vminsert:8480/insert/0/prometheus"
       headers:
         - "X-Scope-OrgID: vmagent"
     - bearer_token: "Dv4X9T8S7EMHffdC"
       url_prefix: "http://victoria-metrics-cluster-host-vminsert:8480/insert/0/prometheus"
       headers:
         - "X-Scope-OrgID: categraf"
1
helm install -n monitoring vmauth vm/victoria-metrics-auth -f vmauth-values.yaml

压测部署

1
2
3
4
5
6
remoteStorages:
  vm:
    writeURL: "http://vmauth-victoria-metrics-auth.monitoring.svc.cluster.local.:8427"
    readURL: "http://vmauth-victoria-metrics-auth.monitoring.svc.cluster.local.:8427"
    writeBearerToken: "HgLy2n2M8hLhMdTw"
    readBearerToken: "yBYmTkjrAeo2RvYY"
1
2
3
4
git clone https://github.com/VictoriaMetrics/prometheus-benchmark
cd prometheus-benchmark
rm -rf chart/templates/vmsingle/
helm  install -n monitoring bench -f values.yaml chart/
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
apiVersion: v1
data:
  nginx.conf: |
    daemon off;
    worker_processes auto;
    pid /tmp/nginx.pid;
    events {
      worker_connections 1000000;
    }
    http {
      proxy_cache_path /tmp/nginx/client_temp keys_zone=all:1m max_size=10m;
      client_body_temp_path /tmp/nginx 1 2;
      proxy_temp_path /tmp/nginx 1 2;
      fastcgi_temp_path /tmp/nginx 1 2;
      uwsgi_temp_path /tmp/nginx 1 2;
      scgi_temp_path /tmp/nginx 1 2;
      upstream nodeexporter {
        server 127.0.0.1:9101;
        keepalive 1000;
      }
      server {
        listen 127.0.0.1:9102;
        server_name foo;
        access_log off;
        error_log off;
        keepalive_disable none;
        location / {
          proxy_pass http://nodeexporter/;
          proxy_http_version 1.1;
          proxy_set_header Connection "";
          proxy_cache all;
          proxy_cache_lock on;
          proxy_cache_valid 1s;
          proxy_cache_background_update on;
          proxy_cache_use_stale updating;
      }
    }
    }
kind: ConfigMap
metadata:
  name: nginx-cm
  namespace: vm-benchmark
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app.kubernetes.io/instance: my-bench
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: prometheus-benchmark
    app.kubernetes.io/version: 1.17.0
    chart-name: my-bench-prometheus-benchmark
    helm.sh/chart: prometheus-benchmark-0.2.0
  name: vmagent-vm-replica-0
  namespace: vm-benchmark
spec:
  progressDeadlineSeconds: 600
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      chart-name: benchmark
      job: vmagent
      remote-storage-name: vm
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        job: vmagent
        remote-storage-name: vm
    spec:
      containers:
      - args:
        - --httpListenAddr=:8436
        - --targetsCount=1000
        - --targetAddr=0.0.0.0:9102
        - --scrapeInterval=10s
        - --scrapeConfigUpdatePercent=1
        - --scrapeConfigUpdateInterval=10m
        image: registry.kbsonlong.com/library/vmagent-config-updater:v1.1.0
        imagePullPolicy: IfNotPresent
        name: vmagent-config-updater
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
      - args:
        - --httpListenAddr=:8429
        - --remoteWrite.showURL
        - --promscrape.config=http://0.0.0.0:8436/api/v1/config
        - --promscrape.configCheckInterval=10m
        - --remoteWrite.url=http://vmauth-victoria-metrics-auth.monitor.svc.cluster.local.:8427
        - --remoteWrite.bearerToken=yBYmTkjrAeo2RvYY
        - --remoteWrite.tmpDataPath=/vmagent-data
        - --remoteWrite.maxDiskUsagePerURL=100MiB
        - --remoteWrite.label=replica=0
        - --promscrape.disableCompression
        - --promscrape.noStaleMarkers
        image: registry.kbsonlong.com/library/vmagent:v1.102.1
        imagePullPolicy: IfNotPresent
        name: vmagent
        ports:
        - containerPort: 8429
          name: metrics
          protocol: TCP
        resources:
          limits:
            memory: 4Gi
          requests:
            cpu: "2"
            memory: 4Gi
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /vmagent-data
          name: vmagent-data
        - mountPath: /secret
          name: vmauth-token
      - args:
        - --path.procfs=/host/proc
        - --path.sysfs=/host/sys
        - --path.rootfs=/host/root
        - --no-collector.wifi
        - --no-collector.arp
        - --collector.processes
        - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+|run/containerd/.+|home/kubernetes/.+)($|/)
        - --web.max-requests=40
        - --web.listen-address=:9101
        image: registry.kbsonlong.com/library/node-exporter:v1.4.0
        imagePullPolicy: IfNotPresent
        name: nodeexporter
        ports:
        - containerPort: 9101
          name: metrics
          protocol: TCP
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /host/proc
          name: proc
          readOnly: true
        - mountPath: /host/sys
          name: sys
          readOnly: true
        - mountPath: /host/root
          mountPropagation: HostToContainer
          name: root
          readOnly: true
      - args:
        - nginx
        - -c
        - /opt/nginx/nginx.conf
        image: registry.kbsonlong.com/library/nginx:1.23.1
        imagePullPolicy: IfNotPresent
        name: nginx
        ports:
        - containerPort: 9102
          name: nginx
          protocol: TCP
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /opt/nginx
          name: nginx-cm
        - mountPath: /tmp/nginx
          name: nginx-cache
        - mountPath: /etc/nginx
          name: nginx-empty
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext:
        fsGroup: 65534
        runAsGroup: 65534
        runAsNonRoot: true
        runAsUser: 65534
      terminationGracePeriodSeconds: 30
      volumes:
      - emptyDir:
          medium: Memory
        name: vmagent-data
      - hostPath:
          path: /proc
          type: ""
        name: proc
      - hostPath:
          path: /sys
          type: ""
        name: sys
      - hostPath:
          path: /
          type: ""
        name: root
      - emptyDir: {}
        name: nginx-cache
      - configMap:
          defaultMode: 420
          name: nginx-cm
        name: nginx-cm
      - emptyDir: {}
        name: nginx-empty
---
apiVersion: v1
data:
  alertmanager.yml: |
    route:
      receiver: blackhole
    receivers:
    - name: blackhole
kind: ConfigMap
metadata:
    meta.helm.sh/release-namespace: vm-benchmark
  name: benchmark-alertmanager-cm
  namespace: vm-benchmark
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: benchmark-vmalert-vm
  namespace: vm-benchmark
spec:
  replicas: 1
  selector:
    matchLabels:
      chart-name: benchmark
      job: vmalert
      remote-storage-name: vm
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        job: vmalert
        remote-storage-name: vm
    spec:
      containers:
      - args:
        - --groups=100
        - --rules=100
        image: registry.kbsonlong.com/library/vmalert-rules-server
        imagePullPolicy: IfNotPresent
        name: vmalert-rules
        ports:
        - containerPort: 8080
          name: rules
          protocol: TCP
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
      - args:
        - --httpListenAddr=:8880
        - --notifier.url=http://127.0.0.1:9093
        - --rule=http://127.0.0.1:8080/rules
        - --evaluationInterval=10s
        - --datasource.url=http://vmauth-victoria-metrics-auth.monitor.svc.cluster.local.:8427
        - --datasource.bearerToken=yBYmTkjrAeo2RvYY
        image: registry.kbsonlong.com/library/vmalert:v1.102.1
        imagePullPolicy: IfNotPresent
        name: vmalert
        ports:
        - containerPort: 8880
          name: metrics
          protocol: TCP
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
      - args:
        - --web.listen-address=:9093
        - --config.file=/config/alertmanager.yml
        image: registry.kbsonlong.com/library/alertmanager:v0.24.0
        imagePullPolicy: IfNotPresent
        name: alertmanager
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /config
          name: alertmanager-cfg
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      terminationGracePeriodSeconds: 30
      volumes:
      - configMap:
          defaultMode: 420
          name: benchmark-alertmanager-cm
        name: alertmanager-cfg

压测场景

32条模板规则中随机选取生成100组,每组100条,总共1w条规则

  • 1亿/秒的样本数据
  • 告警规则数量: 100组,每组100条,总共1w条告警规则
  • vmselect查询器资源配置: 2c4g
  • vmauth单实例
  • vmalert单实例: 每30s查询告警规则

202503201210674

  • 6个vmselect实例CPU基本上拉满90%以上;
  • 12个vmselect实例CPU稳定在70%~80%之间;

参考资料

0%