Skip to content

Commit d02732b

Browse files
add configurable client ACL init startup staggering to smooth login storm (#5021)
1 parent 0a75721 commit d02732b

File tree

4 files changed

+67
-0
lines changed

4 files changed

+67
-0
lines changed

.changelog/5021.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:improvement
2+
client: Add optional startup staggering for client ACL init to spread /v1/acl/login calls and reduce login storms on large clusters. Controlled via client.aclInit.startupStagger.* values (disabled by default).
3+
```

charts/consul/templates/client-daemonset.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,8 @@ spec:
509509
{{- if (or .Values.global.acls.manageSystemACLs (and .Values.global.tls.enabled (not .Values.global.tls.enableAutoEncrypt))) }}
510510
initContainers:
511511
{{- if .Values.global.acls.manageSystemACLs }}
512+
{{- $startupStagger := default (dict "enabled" false "minSeconds" 0 "maxSeconds" 0) .Values.client.aclInit.startupStagger }}
513+
{{- $staggerEnabled := and $startupStagger.enabled (gt (int $startupStagger.maxSeconds) 0) }}
512514
- name: client-acl-init
513515
image: {{ .Values.global.imageK8S }}
514516
{{ template "consul.imagePullPolicy" . }}
@@ -534,6 +536,23 @@ spec:
534536
- "/bin/sh"
535537
- "-ec"
536538
- |
539+
{{- if $staggerEnabled }}
540+
min_delay={{ int $startupStagger.minSeconds }}
541+
max_delay={{ int $startupStagger.maxSeconds }}
542+
if [ $max_delay -lt $min_delay ]; then
543+
echo "client-acl-init: startupStagger.maxSeconds (${max_delay}) must be >= startupStagger.minSeconds (${min_delay})" >&2
544+
exit 1
545+
fi
546+
range=$((max_delay - min_delay))
547+
if [ $range -gt 0 ]; then
548+
jitter=$((RANDOM % (range + 1)))
549+
else
550+
jitter=0
551+
fi
552+
sleep_time=$((min_delay + jitter))
553+
echo "client-acl-init: staggering for ${sleep_time}s before ACL login"
554+
sleep ${sleep_time}
555+
{{- end }}
537556
exec consul-k8s-control-plane acl-init \
538557
-log-level={{ default .Values.global.logLevel .Values.client.logLevel }} \
539558
-log-json={{ .Values.global.logJSON }} \

charts/consul/test/unit/client-daemonset.bats

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,41 @@ load _helpers
13561356
[ "${actual}" = "true" ]
13571357
}
13581358

1359+
@test "client/DaemonSet: acl-init adds startupStagger sleep when enabled" {
1360+
cd `chart_dir`
1361+
local command=$(helm template \
1362+
-s templates/client-daemonset.yaml \
1363+
--set 'client.enabled=true' \
1364+
--set 'global.acls.manageSystemACLs=true' \
1365+
--set 'client.aclInit.startupStagger.enabled=true' \
1366+
--set 'client.aclInit.startupStagger.minSeconds=10' \
1367+
--set 'client.aclInit.startupStagger.maxSeconds=40' \
1368+
. | tee /dev/stderr |
1369+
yq -r '.spec.template.spec.initContainers[] | select(.name == "client-acl-init") | .command' | tee /dev/stderr)
1370+
1371+
local actual=$(echo $command | jq -r ' . | any(contains("min_delay=10"))' | tee /dev/stderr)
1372+
[ "${actual}" = "true" ]
1373+
1374+
local actual=$(echo $command | jq -r ' . | any(contains("max_delay=40"))' | tee /dev/stderr)
1375+
[ "${actual}" = "true" ]
1376+
1377+
local actual=$(echo $command | jq -r ' . | any(contains("client-acl-init: staggering for"))' | tee /dev/stderr)
1378+
[ "${actual}" = "true" ]
1379+
}
1380+
1381+
@test "client/DaemonSet: acl-init startupStagger disabled by default" {
1382+
cd `chart_dir`
1383+
local command=$(helm template \
1384+
-s templates/client-daemonset.yaml \
1385+
--set 'client.enabled=true' \
1386+
--set 'global.acls.manageSystemACLs=true' \
1387+
. | tee /dev/stderr |
1388+
yq -r '.spec.template.spec.initContainers[] | select(.name == "client-acl-init") | .command' | tee /dev/stderr)
1389+
1390+
local actual=$(echo $command | jq -r ' . | any(contains("startupStagger"))' | tee /dev/stderr)
1391+
[ "${actual}" = "false" ]
1392+
}
1393+
13591394
@test "client/DaemonSet: init container is created when global.acls.manageSystemACLs=true and has correct command with Partitions enabled" {
13601395
cd `chart_dir`
13611396
local object=$(helm template \

charts/consul/values.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,6 +1734,16 @@ client:
17341734
# @recurse: false
17351735
tlsInit: null
17361736

1737+
aclInit:
1738+
startupStagger:
1739+
# When true, adds a randomized sleep before running acl-init to avoid login storms
1740+
# against the Consul servers in large clusters. Defaults set to 0 for no sleep.
1741+
enabled: false
1742+
# Minimum seconds to sleep before attempting the ACL login.
1743+
minSeconds: 0
1744+
# Maximum seconds to sleep before attempting the ACL login.
1745+
maxSeconds: 0
1746+
17371747
# A raw string of extra [JSON configuration](https://developer.hashicorp.com/consul/docs/agent/config/config-files) for Consul
17381748
# clients. This will be saved as-is into a ConfigMap that is read by the Consul
17391749
# client agents. This can be used to add additional configuration that

0 commit comments

Comments
 (0)