Skip to content

Commit 75b8362

Browse files
authored
fix: apt install with mirrors (#1972)
* fix: apt install with mirrors * fix: longer timeout and error handling * fix: country specific mirror * fix: move to stage 2 * fix: don't run services in chroot when installing package * fix: simplify apt_install_with_fallback
1 parent ff09b10 commit 75b8362

File tree

2 files changed

+156
-39
lines changed

2 files changed

+156
-39
lines changed

ebssurrogate/scripts/chroot-bootstrap-nix.sh

Lines changed: 151 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,79 @@ export APT_OPTIONS="-oAPT::Install-Recommends=false \
1414
-oAPT::Install-Suggests=false \
1515
-oAcquire::Languages=none"
1616

17+
# Prevent services from starting during package installation in chroot
18+
# This avoids hangs from cloud-init, dbus, etc. trying to start services
19+
cat > /usr/sbin/policy-rc.d <<'EOF'
20+
#!/bin/sh
21+
exit 101
22+
EOF
23+
chmod +x /usr/sbin/policy-rc.d
24+
1725
if [ $(dpkg --print-architecture) = "amd64" ];
1826
then
1927
ARCH="amd64";
2028
else
2129
ARCH="arm64";
2230
fi
2331

24-
# Mirror fallback function for resilient apt-get update
25-
function apt_update_with_fallback {
32+
# Get current mirror from sources.list
33+
function get_current_mirror {
34+
grep -oP 'http://[^/]+(?=/ubuntu-ports/)' /etc/apt/sources.list | head -1 || echo ""
35+
}
36+
37+
# Switch to a different mirror
38+
function switch_mirror {
39+
local new_mirror="$1"
2640
local sources_file="/etc/apt/sources.list"
27-
local max_attempts=2
28-
local attempt=1
2941

30-
# Detect the current region from sources.list (it's already been substituted)
31-
# Extract the region from existing sources.list entries
42+
echo "Switching to mirror: ${new_mirror}"
43+
sed -i "s|http://[^/]*/ubuntu-ports/|http://${new_mirror}/ubuntu-ports/|g" "${sources_file}"
44+
45+
# Show what we're using
46+
echo "Current sources.list configuration:"
47+
grep -E '^deb ' "${sources_file}" | head -3
48+
}
49+
50+
# Get list of mirrors to try
51+
function get_mirror_list {
52+
local sources_file="/etc/apt/sources.list"
3253
local current_region=$(grep -oP '(?<=http://)[^.]+(?=\.clouds\.ports\.ubuntu\.com)' "${sources_file}" | head -1 || echo "")
3354

34-
# Define mirror tiers (in priority order)
35-
local -a mirror_tiers=(
36-
"${current_region}.clouds.ports.ubuntu.com" # Tier 1: Regional CDN (as set in sources.list)
37-
"ports.ubuntu.com" # Tier 2: Global pool
38-
)
55+
local -a mirrors=()
56+
57+
# Priority order:
58+
# 1. Country-specific mirror (most reliable)
59+
# 2. Regional CDN (can be inconsistent)
60+
# 3. Global fallback
3961

40-
# If we couldn't detect current region, skip tier 1
41-
if [ -z "${current_region}" ]; then
42-
echo "Warning: Could not determine region from sources.list, skipping regional CDN"
43-
mirror_tiers=("${mirror_tiers[@]:1}") # Remove first element
62+
# Singapore country mirror for ap-southeast-1
63+
if [ "${current_region}" = "ap-southeast-1" ]; then
64+
mirrors+=("sg.ports.ubuntu.com")
4465
fi
4566

46-
for mirror in "${mirror_tiers[@]}"; do
67+
if [ -n "${current_region}" ]; then
68+
mirrors+=("${current_region}.clouds.ports.ubuntu.com")
69+
fi
70+
71+
mirrors+=("ports.ubuntu.com")
72+
73+
echo "${mirrors[@]}"
74+
}
75+
76+
# Mirror fallback function for resilient apt-get update
77+
function apt_update_with_fallback {
78+
local sources_file="/etc/apt/sources.list"
79+
local -a mirror_list=($(get_mirror_list))
80+
local attempt=1
81+
local max_attempts=${#mirror_list[@]}
82+
83+
for mirror in "${mirror_list[@]}"; do
4784
echo "========================================="
4885
echo "Attempting apt-get update with mirror: ${mirror}"
4986
echo "Attempt ${attempt} of ${max_attempts}"
5087
echo "========================================="
5188

52-
# Update sources.list to use current mirror
53-
sed -i "s|http://[^/]*/ubuntu-ports/|http://${mirror}/ubuntu-ports/|g" "${sources_file}"
54-
55-
# Show what we're using
56-
echo "Current sources.list configuration:"
57-
grep -E '^deb ' "${sources_file}" | head -3
89+
switch_mirror "${mirror}"
5890

5991
# Attempt update with timeout (5 minutes)
6092
if timeout 300 apt-get $APT_OPTIONS update 2>&1; then
@@ -90,6 +122,62 @@ function apt_update_with_fallback {
90122
return 1
91123
}
92124

125+
# Wrapper for apt-get install with mirror fallback on 404 errors
126+
function apt_install_with_fallback {
127+
local -a mirror_list=($(get_mirror_list))
128+
local attempt=1
129+
local max_attempts=${#mirror_list[@]}
130+
local original_mirror=$(get_current_mirror)
131+
132+
for mirror in "${mirror_list[@]}"; do
133+
echo "========================================="
134+
echo "Attempting apt-get install with mirror: ${mirror}"
135+
echo "Attempt ${attempt} of ${max_attempts}"
136+
echo "========================================="
137+
138+
switch_mirror "${mirror}"
139+
140+
# Re-run apt-get update to get package lists from new mirror
141+
if ! timeout 300 apt-get $APT_OPTIONS update 2>&1; then
142+
echo "Warning: apt-get update failed for mirror ${mirror}, trying next..."
143+
attempt=$((attempt + 1))
144+
continue
145+
fi
146+
147+
# Run apt-get install directly (no output capture to avoid buffering/timeout issues)
148+
local exit_code=0
149+
apt-get "$@" || exit_code=$?
150+
151+
if [ ${exit_code} -eq 0 ]; then
152+
echo "========================================="
153+
echo "✓ Successfully installed packages using mirror: ${mirror}"
154+
echo "========================================="
155+
return 0
156+
fi
157+
158+
# On failure, check if it's a mirror issue worth retrying
159+
echo "========================================="
160+
echo "✗ apt-get failed with exit code: ${exit_code}"
161+
echo "========================================="
162+
163+
# Clean apt cache before potential retry
164+
apt-get clean
165+
166+
if [ ${attempt} -lt ${max_attempts} ]; then
167+
local sleep_time=$((attempt * 5))
168+
echo "Waiting ${sleep_time} seconds before trying next mirror..."
169+
sleep ${sleep_time}
170+
fi
171+
172+
attempt=$((attempt + 1))
173+
done
174+
175+
echo "========================================="
176+
echo "ERROR: All mirror tiers failed for apt-get install after ${max_attempts} attempts"
177+
echo "========================================="
178+
return 1
179+
}
180+
93181

94182

95183
function update_install_packages {
@@ -107,25 +195,31 @@ function update_install_packages {
107195
if [ "${ARCH}" = "amd64" ]; then
108196
echo 'grub-pc grub-pc/install_devices_empty select true' | debconf-set-selections
109197
echo 'grub-pc grub-pc/install_devices select' | debconf-set-selections
110-
# Install various packages needed for a booting system
111-
apt-get install -y \
112-
linux-aws \
113-
grub-pc \
114-
e2fsprogs
198+
# Install various packages needed for a booting system (with mirror fallback)
199+
if ! apt_install_with_fallback install -y linux-aws grub-pc e2fsprogs; then
200+
echo "FATAL: Failed to install boot packages"
201+
exit 1
202+
fi
115203
else
116-
apt-get install -y e2fsprogs
204+
if ! apt_install_with_fallback install -y e2fsprogs; then
205+
echo "FATAL: Failed to install e2fsprogs"
206+
exit 1
207+
fi
117208
fi
118-
# Install standard packages
119-
apt-get install -y \
209+
# Install standard packages (with mirror fallback)
210+
# Note: ec2-hibinit-agent, ec2-instance-connect, hibagent moved to stage 2
211+
# because their post-install scripts try to access EC2 metadata service
212+
# which doesn't work in a chroot and causes long hangs
213+
if ! apt_install_with_fallback install -y \
120214
sudo \
121215
wget \
122216
cloud-init \
123217
acpid \
124-
ec2-hibinit-agent \
125-
ec2-instance-connect \
126-
hibagent \
127218
ncurses-term \
128-
ssh-import-id \
219+
ssh-import-id; then
220+
echo "FATAL: Failed to install standard packages"
221+
exit 1
222+
fi
129223

130224
# apt upgrade
131225
apt-get upgrade -y
@@ -136,7 +230,7 @@ function update_install_packages {
136230
echo "FATAL: Failed to update package lists after adding universe repository"
137231
exit 1
138232
fi
139-
apt-get install -y --no-install-recommends \
233+
if ! apt_install_with_fallback install -y --no-install-recommends \
140234
openssh-server \
141235
git \
142236
ufw \
@@ -146,10 +240,16 @@ function update_install_packages {
146240
locales \
147241
at \
148242
less \
149-
python3-systemd
243+
python3-systemd; then
244+
echo "FATAL: Failed to install universe packages"
245+
exit 1
246+
fi
150247

151248
if [ "${ARCH}" = "arm64" ]; then
152-
apt-get $APT_OPTIONS --yes install linux-aws initramfs-tools dosfstools
249+
if ! apt_install_with_fallback $APT_OPTIONS --yes install linux-aws initramfs-tools dosfstools; then
250+
echo "FATAL: Failed to install arm64 boot packages"
251+
exit 1
252+
fi
153253
fi
154254
}
155255

@@ -199,7 +299,10 @@ function install_packages_for_build {
199299
}
200300

201301
function setup_apparmor {
202-
apt-get install -y apparmor apparmor-utils auditd
302+
if ! apt_install_with_fallback install -y apparmor apparmor-utils auditd; then
303+
echo "FATAL: Failed to install apparmor packages"
304+
exit 1
305+
fi
203306

204307
# Copy apparmor profiles
205308
cp -rv /tmp/apparmor_profiles/* /etc/apparmor.d/
@@ -218,7 +321,10 @@ EOF
218321
# Install GRUB
219322
function install_configure_grub {
220323
if [ "${ARCH}" = "arm64" ]; then
221-
apt-get $APT_OPTIONS --yes install cloud-guest-utils fdisk grub-efi-arm64 efibootmgr
324+
if ! apt_install_with_fallback $APT_OPTIONS --yes install cloud-guest-utils fdisk grub-efi-arm64 efibootmgr; then
325+
echo "FATAL: Failed to install grub packages for arm64"
326+
exit 1
327+
fi
222328
setup_grub_conf_arm64
223329
rm -rf /etc/grub.d/30_os-prober
224330
sleep 1
@@ -292,6 +398,11 @@ function cleanup_cache {
292398
apt-get clean
293399
}
294400

401+
# Remove policy-rc.d so services start normally on boot
402+
function enable_services {
403+
rm -f /usr/sbin/policy-rc.d
404+
}
405+
295406
update_install_packages
296407
setup_locale
297408
setup_postgesql_env
@@ -306,3 +417,4 @@ disable_sshd_passwd_auth
306417
disable_fsck
307418
#setup_ccache
308419
cleanup_cache
420+
enable_services

scripts/nix-provision.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ function install_packages {
99
# Setup Ansible on host VM
1010
sudo apt-get update && sudo apt-get install -y software-properties-common
1111

12+
# Install EC2-specific packages that were deferred from stage 1
13+
# These packages have post-install scripts that need EC2 metadata service access
14+
# which only works on a real running EC2 instance (not in chroot)
15+
sudo apt-get install -y ec2-hibinit-agent ec2-instance-connect hibagent
16+
1217
# Manually add GPG key with explicit keyserver
1318
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 93C4A3FD7BB9C367
1419

0 commit comments

Comments
 (0)