Skip to content

Commit d9abd91

Browse files
add configurable client ACL init startup staggering to smooth login storm (#5021)
1 parent d9dc8ca commit d9abd91

File tree

4 files changed

+67
-0
lines changed

4 files changed

+67
-0
lines changed

.changelog/5021.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:improvement
2+
client: Add optional startup staggering for client ACL init to spread /v1/acl/login calls and reduce login storms on large clusters. Controlled via client.aclInit.startupStagger.* values (disabled by default).
3+
```

charts/consul/templates/client-daemonset.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,8 @@ spec:
518518
{{- if (or .Values.global.acls.manageSystemACLs (and .Values.global.tls.enabled (not .Values.global.tls.enableAutoEncrypt))) }}
519519
initContainers:
520520
{{- if .Values.global.acls.manageSystemACLs }}
521+
{{- $startupStagger := default (dict "enabled" false "minSeconds" 0 "maxSeconds" 0) .Values.client.aclInit.startupStagger }}
522+
{{- $staggerEnabled := and $startupStagger.enabled (gt (int $startupStagger.maxSeconds) 0) }}
521523
- name: client-acl-init
522524
image: {{ .Values.global.imageK8S }}
523525
{{ template "consul.imagePullPolicy" . }}
@@ -543,6 +545,23 @@ spec:
543545
- "/bin/sh"
544546
- "-ec"
545547
- |
548+
{{- if $staggerEnabled }}
549+
min_delay={{ int $startupStagger.minSeconds }}
550+
max_delay={{ int $startupStagger.maxSeconds }}
551+
if [ $max_delay -lt $min_delay ]; then
552+
echo "client-acl-init: startupStagger.maxSeconds (${max_delay}) must be >= startupStagger.minSeconds (${min_delay})" >&2
553+
exit 1
554+
fi
555+
range=$((max_delay - min_delay))
556+
if [ $range -gt 0 ]; then
557+
jitter=$((RANDOM % (range + 1)))
558+
else
559+
jitter=0
560+
fi
561+
sleep_time=$((min_delay + jitter))
562+
echo "client-acl-init: staggering for ${sleep_time}s before ACL login"
563+
sleep ${sleep_time}
564+
{{- end }}
546565
exec consul-k8s-control-plane acl-init \
547566
-log-level={{ default .Values.global.logLevel .Values.client.logLevel }} \
548567
-log-json={{ .Values.global.logJSON }} \

charts/consul/test/unit/client-daemonset.bats

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,41 @@ load _helpers
13561356
[ "${actual}" = "true" ]
13571357
}
13581358

1359+
@test "client/DaemonSet: acl-init adds startupStagger sleep when enabled" {
1360+
cd `chart_dir`
1361+
local command=$(helm template \
1362+
-s templates/client-daemonset.yaml \
1363+
--set 'client.enabled=true' \
1364+
--set 'global.acls.manageSystemACLs=true' \
1365+
--set 'client.aclInit.startupStagger.enabled=true' \
1366+
--set 'client.aclInit.startupStagger.minSeconds=10' \
1367+
--set 'client.aclInit.startupStagger.maxSeconds=40' \
1368+
. | tee /dev/stderr |
1369+
yq -r '.spec.template.spec.initContainers[] | select(.name == "client-acl-init") | .command' | tee /dev/stderr)
1370+
1371+
local actual=$(echo $command | jq -r ' . | any(contains("min_delay=10"))' | tee /dev/stderr)
1372+
[ "${actual}" = "true" ]
1373+
1374+
local actual=$(echo $command | jq -r ' . | any(contains("max_delay=40"))' | tee /dev/stderr)
1375+
[ "${actual}" = "true" ]
1376+
1377+
local actual=$(echo $command | jq -r ' . | any(contains("client-acl-init: staggering for"))' | tee /dev/stderr)
1378+
[ "${actual}" = "true" ]
1379+
}
1380+
1381+
@test "client/DaemonSet: acl-init startupStagger disabled by default" {
1382+
cd `chart_dir`
1383+
local command=$(helm template \
1384+
-s templates/client-daemonset.yaml \
1385+
--set 'client.enabled=true' \
1386+
--set 'global.acls.manageSystemACLs=true' \
1387+
. | tee /dev/stderr |
1388+
yq -r '.spec.template.spec.initContainers[] | select(.name == "client-acl-init") | .command' | tee /dev/stderr)
1389+
1390+
local actual=$(echo $command | jq -r ' . | any(contains("startupStagger"))' | tee /dev/stderr)
1391+
[ "${actual}" = "false" ]
1392+
}
1393+
13591394
@test "client/DaemonSet: init container is created when global.acls.manageSystemACLs=true and has correct command with Partitions enabled" {
13601395
cd `chart_dir`
13611396
local object=$(helm template \

charts/consul/values.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1737,6 +1737,16 @@ client:
17371737
# @recurse: false
17381738
tlsInit: null
17391739

1740+
aclInit:
1741+
startupStagger:
1742+
# When true, adds a randomized sleep before running acl-init to avoid login storms
1743+
# against the Consul servers in large clusters. Defaults set to 0 for no sleep.
1744+
enabled: false
1745+
# Minimum seconds to sleep before attempting the ACL login.
1746+
minSeconds: 0
1747+
# Maximum seconds to sleep before attempting the ACL login.
1748+
maxSeconds: 0
1749+
17401750
# A raw string of extra [JSON configuration](https://developer.hashicorp.com/consul/docs/agent/config/config-files) for Consul
17411751
# clients. This will be saved as-is into a ConfigMap that is read by the Consul
17421752
# client agents. This can be used to add additional configuration that

0 commit comments

Comments
 (0)