From dbd28045856902ff0a8191c20cdf33a762da1792 Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Fri, 26 Jun 2026 13:01:54 +0200 Subject: [PATCH] feat: enable parallel e2e test execution for faster local dev feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run standard and experimental e2e tests simultaneously on separate KIND clusters via `make -j2 test-e2e test-experimental-e2e`, cutting local e2e wall-clock time by ~37% (25m → 16m). This is especially valuable when working with AI coding agents that benefit from faster feedback loops. Key changes: - Use Make pattern rules (kind-cluster-%, kind-load-%, etc.) with the stem identifying the variant. Cluster names, kubeconfig paths, and coverage names are derived by convention from the stem. Only variant-specific deviations need explicit overrides. - Remove .NOTPARALLEL directive (no longer needed with unique targets) - Isolate per-cluster kubeconfig files under .kubeconfig/ - Use separate KIND cluster names and prometheus host ports (30900 vs 30901) to avoid resource conflicts - Make PROMETHEUS_URL configurable via env var in summary reporter; skip summary generation when unset - Conditionally serialize kind-deploy when both targets run together to avoid races on shared files Linux prerequisite for parallel runs: sudo sysctl fs.inotify.max_user_instances=512 Co-Authored-By: Claude --- .gitignore | 3 + AGENTS.md | 5 + Makefile | 176 +++++++++++++++++++++-------- kind-config/kind-config-2node.yaml | 2 +- test/e2e/README.md | 11 +- test/internal/summary/summary.go | 8 +- 6 files changed, 150 insertions(+), 55 deletions(-) diff --git a/.gitignore b/.gitignore index ddbd768e80..3748a581e0 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,6 @@ site # Test profiling artifacts test-profiles/ + +# Per-cluster kubeconfig files for e2e tests +.kubeconfig/ diff --git a/AGENTS.md b/AGENTS.md index 1a62ddcb5c..e751f90aa5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -95,6 +95,11 @@ make test-e2e # Standard features make test-experimental-e2e # Experimental features make test-extension-developer-e2e # Extension developer workflow +# E2E tests in parallel (runs standard and experimental on separate KIND clusters) +# Recommended when machine has enough resources (4+ CPU cores, 8+ GB RAM) for faster feedback +make -j2 test-e2e test-experimental-e2e +# Linux prerequisite: sudo sysctl fs.inotify.max_user_instances=512 + # Regression tests make test-regression diff --git a/Makefile b/Makefile index 9c0f9719e9..7971595f14 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,8 @@ ifeq ($(origin KIND_CONFIG), undefined) KIND_CONFIG := ./kind-config/kind-config.yaml endif +KUBECONFIG_DIR := $(ROOT_DIR)/.kubeconfig + ifneq (, $(shell command -v docker 2>/dev/null)) CONTAINER_RUNTIME := docker else ifneq (, $(shell command -v podman 2>/dev/null)) @@ -96,9 +98,6 @@ EXPERIMENTAL_MANIFEST := $(MANIFEST_HOME)/experimental.yaml EXPERIMENTAL_E2E_MANIFEST := $(MANIFEST_HOME)/experimental-e2e.yaml CATALOGS_MANIFEST := $(MANIFEST_HOME)/default-catalogs.yaml -# Disable -j flag for make -.NOTPARALLEL: - .DEFAULT_GOAL := build #SECTION General @@ -275,25 +274,6 @@ $(eval $(call install-sh,standard,operator-controller-standard.yaml)) .PHONY: test test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests. -.PHONY: e2e -e2e: E2E_TIMEOUT ?= 20m -e2e: GODOG_ARGS ?= -e2e: #EXHELP Run the e2e tests. -ifeq ($(strip $(GODOG_ARGS)),) - trap 'exit 130' INT; \ - set +e; \ - go test -count=1 -v ./test/e2e/features_test.go -timeout $(E2E_TIMEOUT) -args --godog.tags="~@Serial" --godog.concurrency=100; \ - parallelExit=$$?; \ - go test -count=1 -v ./test/e2e/features_test.go -timeout $(E2E_TIMEOUT) -args --godog.tags="@Serial" --godog.concurrency=1; \ - serialExit=$$?; \ - if [[ $$parallelExit -ne 0 ]] || [[ $$serialExit -ne 0 ]]; then \ - echo "e2e tests failed: parallel=$$parallelExit serial=$$serialExit"; \ - exit 1; \ - fi -else - go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT) -args $(GODOG_ARGS) -endif - export CLUSTER_REGISTRY_HOST := docker-registry.operator-controller-e2e.svc:5000 .PHONY: extension-developer-e2e extension-developer-e2e: export OPERATOR_SDK := $(OPERATOR_SDK) @@ -333,47 +313,146 @@ test-regression: #HELP Run regression test # may be helpful for debugging purposes after a test run. # # for example: ARTIFACT_PATH=/tmp/artifacts make test-e2e -.PHONY: test-e2e -test-e2e: SOURCE_MANIFEST := $(STANDARD_E2E_MANIFEST) -test-e2e: KIND_CLUSTER_NAME := operator-controller-e2e +# +# E2E targets use pattern rules (kind-cluster-%, kind-load-%, etc.) with the stem +# identifying the variant (e2e, experimental-e2e). Each variant runs on its own KIND +# cluster, enabling parallel execution via make -j2 test-e2e test-experimental-e2e. +# +# Parallel runs require sufficient inotify instances for multiple KIND clusters: +# sudo sysctl fs.inotify.max_user_instances=512 +# To persist: echo "fs.inotify.max_user_instances=512" | sudo tee /etc/sysctl.d/99-kind.conf +# +ifneq (,$(and $(findstring -j,$(MAKEFLAGS)),$(findstring test-e2e,$(MAKECMDGOALS)),$(findstring test-experimental-e2e,$(MAKECMDGOALS)),$(findstring Linux,$(shell uname)))) +$(info NOTE: Running both standard and experimental e2e tests in parallel requires fs.inotify.max_user_instances>=512 (current: $(shell sysctl -n fs.inotify.max_user_instances))) +endif + +# Variant-specific overrides on top-level targets propagate down through the entire chain +.PHONY: test-e2e test-experimental-e2e test-e2e: GO_BUILD_EXTRA_FLAGS := -cover -test-e2e: COVERAGE_NAME := e2e -test-e2e: export MANIFEST := $(STANDARD_RELEASE_MANIFEST) -test-e2e: export INSTALL_DEFAULT_CATALOGS := false -test-e2e: run-internal prometheus e2e e2e-coverage kind-clean #HELP Run e2e test suite on local kind cluster - -.PHONY: test-experimental-e2e -test-experimental-e2e: SOURCE_MANIFEST := $(EXPERIMENTAL_E2E_MANIFEST) -test-experimental-e2e: KIND_CLUSTER_NAME := operator-controller-e2e -test-experimental-e2e: KIND_CONFIG := ./kind-config/kind-config-2node.yaml +test-e2e: E2E_SOURCE_MANIFEST := $(STANDARD_E2E_MANIFEST) +test-e2e: E2E_RELEASE_MANIFEST := $(STANDARD_RELEASE_MANIFEST) test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover -test-experimental-e2e: COVERAGE_NAME := experimental-e2e -test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST) -test-experimental-e2e: export INSTALL_DEFAULT_CATALOGS := false -test-experimental-e2e: PROMETHEUS_VALUES := testdata/prometheus/values-experimental.yaml +test-experimental-e2e: KIND_CONFIG := ./kind-config/kind-config-2node.yaml +test-experimental-e2e: E2E_SOURCE_MANIFEST := $(EXPERIMENTAL_E2E_MANIFEST) +test-experimental-e2e: E2E_RELEASE_MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST) +test-experimental-e2e: E2E_PROMETHEUS_VALUES := testdata/prometheus/values-experimental.yaml test-experimental-e2e: E2E_TIMEOUT ?= 25m -test-experimental-e2e: run-internal prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster + +# Conventions: cluster name = operator-controller-$*, kubeconfig = $(KUBECONFIG_DIR)/operator-controller-$*.kubeconfig +E2E_KUBECONFIG = $(KUBECONFIG_DIR)/operator-controller-$*.kubeconfig CATALOGD_CERT_SECRET = catalogd-service-cert-$(VERSION) -.PHONY: prometheus -prometheus: PROMETHEUS_NAMESPACE := olmv1-system -prometheus: PROMETHEUS_CHART_VERSION := 86.2.2 -prometheus: $(HELM) #EXHELP Deploy Prometheus into specified namespace +.PHONY: kind-cluster-% +kind-cluster-%: $(KIND) docker-build + @KIND_NODE_IMAGE=$$(K8S_VERSION=$(K8S_VERSION) $(VALIDATE_KINDEST_NODE_SCRIPT)) || exit 1; \ + $(KIND) delete cluster --name operator-controller-$* 2>/dev/null || true; \ + $(KIND) create cluster --name operator-controller-$* --config $(KIND_CONFIG) --image "$$KIND_NODE_IMAGE"; \ + mkdir -p $(KUBECONFIG_DIR); \ + KUBECONFIG=$(E2E_KUBECONFIG) $(KIND) export kubeconfig --name operator-controller-$*; \ + KUBECONFIG=$(E2E_KUBECONFIG) kubectl wait --for=condition=Ready nodes --all --timeout=2m + +.PHONY: kind-load-% +kind-load-%: kind-cluster-% + $(KIND) load docker-image $(OPCON_IMG) --name operator-controller-$* + $(KIND) load docker-image $(CATD_IMG) --name operator-controller-$* + +.PHONY: kind-deploy-% +kind-deploy-%: kind-load-% manifests + @echo "Using $(E2E_SOURCE_MANIFEST) as source manifest" + sed "s/cert-git-version/cert-$(VERSION)/g" $(E2E_SOURCE_MANIFEST) > $(E2E_RELEASE_MANIFEST) + cp $(CATALOGS_MANIFEST) $(RELEASE_CATALOGS) + export KUBECONFIG=$(E2E_KUBECONFIG) \ + DEFAULT_CATALOG=$(RELEASE_CATALOGS) \ + CERT_MGR_VERSION=$(CERT_MGR_VERSION) \ + INSTALL_DEFAULT_CATALOGS=false \ + MANIFEST=$(E2E_RELEASE_MANIFEST); \ + envsubst '$$DEFAULT_CATALOG,$$CERT_MGR_VERSION,$$INSTALL_DEFAULT_CATALOGS,$$MANIFEST' < scripts/install.tpl.sh | bash -s + +.PHONY: lint-deployed-% +lint-deployed-%: kind-deploy-% $(KUBE_SCORE) + (export KUBECONFIG=$(E2E_KUBECONFIG); \ + for ns in $$(printf "olmv1-system\n%s\n" "$(CATD_NAMESPACE)" | uniq); do \ + for resource in $$(kubectl api-resources --verbs=list --namespaced -o name); do \ + kubectl get $$resource -n $$ns -o yaml ; \ + echo "---" ; \ + done \ + done) | $(KUBE_SCORE) score - \ + --ignore-test container-resources \ + --ignore-test container-image-pull-policy \ + --ignore-test container-ephemeral-storage-request-and-limit \ + --ignore-test container-security-context-user-group-id + +.PHONY: wait-% +wait-%: lint-deployed-% + KUBECONFIG=$(E2E_KUBECONFIG) kubectl wait --for=condition=Available --namespace=$(CATD_NAMESPACE) deployment/catalogd-controller-manager --timeout=60s + KUBECONFIG=$(E2E_KUBECONFIG) kubectl wait --for=condition=Ready --namespace=$(CATD_NAMESPACE) certificate/catalogd-service-cert + +.PHONY: prometheus-% +prometheus-%: wait-% $(HELM) ifeq ($(strip $(E2E_SUMMARY_OUTPUT)),) @echo "E2E_SUMMARY_OUTPUT unset; skipping prometheus deployment" else - $(HELM) upgrade --install prometheus oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack \ + KUBECONFIG=$(E2E_KUBECONFIG) $(HELM) upgrade --install prometheus oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack \ --namespace $(PROMETHEUS_NAMESPACE) --create-namespace \ --version $(PROMETHEUS_CHART_VERSION) \ -f testdata/prometheus/values.yaml \ - $(if $(PROMETHEUS_VALUES),-f $(PROMETHEUS_VALUES)) \ + $(if $(E2E_PROMETHEUS_VALUES),-f $(E2E_PROMETHEUS_VALUES)) \ --set-string 'prometheus.additionalServiceMonitors[1].endpoints[0].tlsConfig.ca.secret.name=$(CATALOGD_CERT_SECRET)' \ --set-string 'prometheus.additionalServiceMonitors[1].endpoints[0].tlsConfig.cert.secret.name=$(CATALOGD_CERT_SECRET)' \ --set-string 'prometheus.additionalServiceMonitors[1].endpoints[0].tlsConfig.keySecret.name=$(CATALOGD_CERT_SECRET)' \ --wait --timeout 5m endif +.PHONY: e2e-run-% +e2e-run-%: E2E_TIMEOUT ?= 20m +e2e-run-%: GODOG_ARGS ?= +e2e-run-%: prometheus-% +ifeq ($(strip $(GODOG_ARGS)),) + E2E_PROM_PORT=$$(grep -A1 'containerPort: 30900' $(KIND_CONFIG) | grep hostPort | awk '{print $$2}'); \ + trap 'exit 130' INT; \ + set +e; \ + KUBECONFIG=$(E2E_KUBECONFIG) \ + MANIFEST=$(E2E_RELEASE_MANIFEST) \ + INSTALL_DEFAULT_CATALOGS=false \ + PROMETHEUS_URL=http://localhost:$$E2E_PROM_PORT \ + go test -count=1 -v ./test/e2e/features_test.go -timeout $(E2E_TIMEOUT) -args --godog.tags="~@Serial" --godog.concurrency=100; \ + parallelExit=$$?; \ + KUBECONFIG=$(E2E_KUBECONFIG) \ + MANIFEST=$(E2E_RELEASE_MANIFEST) \ + INSTALL_DEFAULT_CATALOGS=false \ + PROMETHEUS_URL=http://localhost:$$E2E_PROM_PORT \ + go test -count=1 -v ./test/e2e/features_test.go -timeout $(E2E_TIMEOUT) -args --godog.tags="@Serial" --godog.concurrency=1; \ + serialExit=$$?; \ + if [[ $$parallelExit -ne 0 ]] || [[ $$serialExit -ne 0 ]]; then \ + echo "e2e tests failed: parallel=$$parallelExit serial=$$serialExit"; \ + exit 1; \ + fi +else + E2E_PROM_PORT=$$(grep -A1 'containerPort: 30900' $(KIND_CONFIG) | grep hostPort | awk '{print $$2}'); \ + KUBECONFIG=$(E2E_KUBECONFIG) \ + MANIFEST=$(E2E_RELEASE_MANIFEST) \ + INSTALL_DEFAULT_CATALOGS=false \ + PROMETHEUS_URL=http://localhost:$$E2E_PROM_PORT \ + go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT) -args $(GODOG_ARGS) +endif + +.PHONY: e2e-coverage-% +e2e-coverage-%: e2e-run-% + KUBECONFIG=$(E2E_KUBECONFIG) COVERAGE_NAME=$* ./hack/test/e2e-coverage.sh + +.PHONY: kind-clean-% +kind-clean-%: e2e-coverage-% + $(KIND) delete cluster --name operator-controller-$* + +test-e2e: kind-clean-e2e #HELP Run e2e test suite on local kind cluster +test-experimental-e2e: kind-clean-experimental-e2e #HELP Run experimental e2e test suite on local kind cluster + +# When both targets run together, serialize deployment to avoid races on shared files +ifneq (,$(and $(findstring test-e2e,$(MAKECMDGOALS)),$(findstring test-experimental-e2e,$(MAKECMDGOALS)))) +kind-deploy-experimental-e2e: | kind-deploy-e2e +endif + .PHONY: test-extension-developer-e2e test-extension-developer-e2e: SOURCE_MANIFEST := $(STANDARD_E2E_MANIFEST) test-extension-developer-e2e: KIND_CLUSTER_NAME := operator-controller-ext-dev-e2e @@ -424,10 +503,6 @@ test-st2ex-e2e: export TEST_CLUSTER_CATALOG_NAME := test-catalog test-st2ex-e2e: export TEST_CLUSTER_EXTENSION_NAME := test-package test-st2ex-e2e: experimental/install.sh standard/install.sh $(TEST_UPGRADE_E2E_TASKS) #HELP Run swichover (standard -> experimental) e2e tests on a local kind cluster -.PHONY: e2e-coverage -e2e-coverage: - COVERAGE_NAME=$(COVERAGE_NAME) ./hack/test/e2e-coverage.sh - TEST_PROFILE_BIN := bin/test-profile .PHONY: build-test-profiler build-test-profiler: #EXHELP Build the test profiling tool @@ -560,6 +635,9 @@ run-experimental: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST) run-experimental: run-internal #HELP Build the operator-controller then deploy it with the experimental manifest into a new kind cluster. CATD_NAMESPACE := olmv1-system +PROMETHEUS_NAMESPACE := olmv1-system +PROMETHEUS_CHART_VERSION := 86.2.2 + .PHONY: wait wait: kubectl wait --for=condition=Available --namespace=$(CATD_NAMESPACE) deployment/catalogd-controller-manager --timeout=60s diff --git a/kind-config/kind-config-2node.yaml b/kind-config/kind-config-2node.yaml index d87ec06b53..66a67f9852 100644 --- a/kind-config/kind-config-2node.yaml +++ b/kind-config/kind-config-2node.yaml @@ -5,7 +5,7 @@ nodes: extraPortMappings: # prometheus metrics service's NodePort - containerPort: 30900 - hostPort: 30900 + hostPort: 30901 listenAddress: "127.0.0.1" protocol: tcp kubeadmConfigPatches: diff --git a/test/e2e/README.md b/test/e2e/README.md index dac3c68808..8997948dbf 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -247,13 +247,17 @@ The `Makefile` automatically separates scenarios when run without additional `GO ### Run All Tests ```bash -make test-e2e +make test-e2e # Standard features +make test-experimental-e2e # Experimental features ``` -or +### Run Standard and Experimental in Parallel + +Both suites can run simultaneously on separate KIND clusters (see the Makefile +comment for Linux prerequisites): ```bash -make test-experimental-e2e +make -j2 test-e2e test-experimental-e2e ``` Custom godog arguments can be modified by setting the following: @@ -312,6 +316,7 @@ go test test/e2e/features_test.go --log.debug --k8s.cli=oc - `KUBECONFIG`: Path to kubeconfig file (defaults to `~/.kube/config`) - `E2E_SUMMARY_OUTPUT`: Path to write test summary (optional) +- `PROMETHEUS_URL`: Prometheus endpoint for summary generation (set automatically by `make`; skips summary if unset) - `CLUSTER_REGISTRY_HOST`: In-cluster registry host for pulling catalog images ## Design Patterns diff --git a/test/internal/summary/summary.go b/test/internal/summary/summary.go index 4eea7006ea..0592dac7f6 100644 --- a/test/internal/summary/summary.go +++ b/test/internal/summary/summary.go @@ -19,7 +19,6 @@ var ( summaryTemplate = "summary.md.tmpl" alertsTemplate = "alert.md.tmpl" chartTemplate = "mermaid_chart.md.tmpl" - defaultPromUrl = "http://localhost:30900" ) type summaryAlerts struct { @@ -177,9 +176,14 @@ func PrintSummary(path string) error { fmt.Printf("No summary output path specified; skipping") return nil } + promURL := os.Getenv("PROMETHEUS_URL") + if promURL == "" { + fmt.Printf("PROMETHEUS_URL not set; skipping summary generation") + return nil + } client, err := api.NewClient(api.Config{ - Address: defaultPromUrl, + Address: promURL, }) if err != nil { fmt.Printf("warning: failed to initialize promQL client: %v", err)