Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/helm-validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,37 @@ jobs:
--set authProxy.enabled=false \
--set metrics.disableAuth=true

helm-lint-crds:
name: Lint CRDs Chart
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install Helm
uses: azure/setup-helm@v3

- name: Lint CRDs chart
run: helm lint --strict helm/temporal-worker-controller-crds

helm-template-crds:
name: Template CRDs Chart
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install Helm
uses: azure/setup-helm@v3

- name: Template CRDs chart
run: helm template test-release helm/temporal-worker-controller-crds

helm-validate-succeed:
name: All Helm Validations Succeed
needs:
- helm-lint
- helm-template
- helm-lint-crds
- helm-template-crds
runs-on: ubuntu-latest
if: always()
env:
Expand Down
20 changes: 13 additions & 7 deletions .github/workflows/helm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,15 @@ jobs:
# Update Chart.yaml with new version
sed -i "s/version: $CURRENT_VERSION/version: $NEW_VERSION/g" helm/temporal-worker-controller/Chart.yaml

# Also bump CRDs chart version
sed -i "s/version: $CURRENT_VERSION/version: $NEW_VERSION/g" helm/temporal-worker-controller-crds/Chart.yaml

# Set output variable for use in later steps
echo "version=$NEW_VERSION" >> "$GITHUB_OUTPUT"

# Commit the change
# Commit both Chart.yaml files
git add helm/temporal-worker-controller/Chart.yaml
git add helm/temporal-worker-controller-crds/Chart.yaml
git commit -m "Bump chart version to $NEW_VERSION [skip ci]"
git push

Expand All @@ -92,16 +96,18 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PAT}}

- name: Package and Push Helm chart
- name: Package and Push Helm charts
run: |
# Use version from previous step
VERSION=${{ steps.bump_version.outputs.version }}
echo "Chart version: $VERSION"

# Package the chart
helm package ./helm/temporal-worker-controller
# Package and push the CRDs chart
helm package ./helm/temporal-worker-controller-crds
helm push temporal-worker-controller-crds-${VERSION}.tgz oci://docker.io/temporalio
echo "✅ CRDs chart pushed successfully to oci://docker.io/temporalio/temporal-worker-controller-crds:${VERSION}"

# Push to Docker Hub
# Package and push the controller chart
helm package ./helm/temporal-worker-controller
helm push temporal-worker-controller-${VERSION}.tgz oci://docker.io/temporalio

echo "✅ Chart pushed successfully to oci://docker.io/temporalio/temporal-worker-controller:${VERSION}"
echo "✅ Controller chart pushed successfully to oci://docker.io/temporalio/temporal-worker-controller:${VERSION}"
20 changes: 13 additions & 7 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,24 +115,30 @@ jobs:
sed -i "s/version: .*/version: $NEW_VERSION/" helm/temporal-worker-controller/Chart.yaml
sed -i "s/appVersion: .*/appVersion: ${GITHUB_REF_NAME#v}/" helm/temporal-worker-controller/Chart.yaml

# Also bump CRDs chart version
sed -i "s/version: .*/version: $NEW_VERSION/" helm/temporal-worker-controller-crds/Chart.yaml

# Set output variable for use in later steps
echo "version=$NEW_VERSION" >> "$GITHUB_OUTPUT"

# Commit the change
# Commit both Chart.yaml files
git add helm/temporal-worker-controller/Chart.yaml
git add helm/temporal-worker-controller-crds/Chart.yaml
git commit -m "Bump chart version to $NEW_VERSION [skip ci]"
git push

- name: Package and Push Helm chart
- name: Package and Push Helm charts
run: |
# Use version from previous step
VERSION=${{ steps.bump_version.outputs.version }}
echo "Chart version: $VERSION"

# Package the chart
helm package ./helm/temporal-worker-controller
# Package and push the CRDs chart
helm package ./helm/temporal-worker-controller-crds
helm push temporal-worker-controller-crds-${VERSION}.tgz oci://docker.io/temporalio
echo "✅ CRDs chart pushed successfully to oci://docker.io/temporalio/temporal-worker-controller-crds:${VERSION}"

# Push to Docker Hub
# Package and push the controller chart
helm package ./helm/temporal-worker-controller
helm push temporal-worker-controller-${VERSION}.tgz oci://docker.io/temporalio

echo "✅ Chart pushed successfully to oci://docker.io/temporalio/temporal-worker-controller:${VERSION}"
echo "✅ Controller chart pushed successfully to oci://docker.io/temporalio/temporal-worker-controller:${VERSION}"
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ help: ## Display this help.
.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
GOWORK=off GO111MODULE=on $(CONTROLLER_GEN) rbac:roleName=manager-role crd:allowDangerousTypes=true,maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths=./api/... paths=./internal/... paths=./cmd/... \
output:crd:artifacts:config=helm/temporal-worker-controller/crds
output:crd:artifacts:config=helm/temporal-worker-controller-crds/templates

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
Expand Down Expand Up @@ -268,15 +268,16 @@ endif

.PHONY: install
install: manifests ## Install CRDs into the K8s cluster specified in ~/.kube/config.
$(KUBECTL) apply --context $(K8S_CONTEXT) -f helm/temporal-worker-controller/crds
$(KUBECTL) apply --context $(K8S_CONTEXT) -f helm/temporal-worker-controller-crds/templates

.PHONY: uninstall
uninstall: manifests ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUBECTL) delete --context $(K8S_CONTEXT) --ignore-not-found=$(ignore-not-found) -f helm/temporal-worker-controller/crds
$(KUBECTL) delete --context $(K8S_CONTEXT) --ignore-not-found=$(ignore-not-found) -f helm/temporal-worker-controller-crds/templates

.PHONY: deploy
deploy: manifests helm ## Deploy controller to the K8s cluster specified in ~/.kube/config.
helm install temporal-worker-controller ./helm/temporal-worker-controller --create-namespace --namespace temporal-system
helm install temporal-worker-controller-crds ./helm/temporal-worker-controller-crds --create-namespace --namespace temporal-system
helm install temporal-worker-controller ./helm/temporal-worker-controller --namespace temporal-system

.PHONY: undeploy
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
Expand Down
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,25 @@ When you update the image, the controller automatically:

### 🔧 Installation

CRDs are shipped as a separate Helm chart so they can be upgraded independently of the controller. Install the CRDs chart first, then the controller chart:

```bash
# Install using Helm in your preferred namespace
# 1. Install CRDs
helm install temporal-worker-controller-crds \
oci://docker.io/temporalio/temporal-worker-controller-crds \
--version <version> \
--namespace <your-namespace> \
--create-namespace

# 2. Install the controller
helm install temporal-worker-controller \
oci://docker.io/temporalio/temporal-worker-controller \
--version <version> \
--namespace <your-namespace>
```

See [docs/crd-management.md](docs/crd-management.md) for upgrade, rollback, and migration instructions.

### Next Steps

**New to deploying workers with this controller?** → Start with our [Migration Guide](docs/migration-to-versioned.md) to learn how to safely transition from traditional deployments.
Expand Down Expand Up @@ -137,6 +149,7 @@ The Temporal Worker Controller eliminates this operational overhead by automatin
| [Configuration](docs/configuration.md) | Complete configuration reference |
| [Concepts](docs/concepts.md) | Key concepts and terminology |
| [Limits](docs/limits.md) | Technical constraints and limitations |
| [CRD Management](docs/crd-management.md) | CRD upgrade, rollback, and migration guide |

## 🔧 Worker Configuration

Expand Down
13 changes: 8 additions & 5 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,25 @@ This documentation structure is designed to support various types of technical d

## Index

### [Migration Guide](migration-to-versioned.md)
Comprehensive guide for migrating from existing unversioned worker deployment systems to the Temporal Worker Controller. Includes step-by-step instructions, configuration mapping, and common patterns.
See [Migration to Unversioned](migration-to-unversioned.md) for how to migrate back to an unversioned deployment system.
### [Architecture](architecture.md)
High-level overview of the Temporal Worker Controller architecture.

### [Concepts](concepts.md)
Conceptual guides for the Temporal Worker Controller system.

### [Configuration](configuration.md)
Configuration options for the Temporal Worker Controller.

### [Architecture](architecture.md)
High-level overview of the Temporal Worker Controller architecture.
### [CRD Management](crd-management.md)
How to install and upgrade the Temporal Worker Controller Custom Resource Definitions (CRDs).

### [Limits](limits.md)
Technical constraints and limitations of the Temporal Worker Controller system, including maximum field lengths and other operational boundaries.

### [Migration Guide](migration-to-versioned.md)
Comprehensive guide for migrating from existing unversioned worker deployment systems to the Temporal Worker Controller. Includes step-by-step instructions, configuration mapping, and common patterns.
See [Migration to Unversioned](migration-to-unversioned.md) for how to migrate back to an unversioned deployment system.

### [Ownership](ownership.md)
How the controller gets permission to manage a Worker Deployment, how a human client can take or give back control.

Expand Down
136 changes: 136 additions & 0 deletions docs/crd-management.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# CRD Management
Comment thread
carlydf marked this conversation as resolved.

## Why a Separate CRDs Chart?

Helm's `crds/` directory installs CRDs on `helm install` but **silently ignores them on `helm upgrade`** and does not delete them on `helm uninstall`. This means users have no supported mechanism to upgrade CRDs when upgrading the controller chart via the standard Helm workflow.

To provide an explicit, version-tracked CRD upgrade path, the temporal-worker-controller ships CRDs as a separate Helm chart: `temporal-worker-controller-crds`. This is the same pattern used by [Karpenter](https://karpenter.sh/docs/upgrading/upgrade-guide/) (`karpenter-crd`) and [prometheus-operator-crds](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-operator-crds).

Benefits:
- CRDs and the controller can be upgraded and rolled back independently
- Clear versioning: both charts always use the same version number
- CRDs won't be accidentally uninstalled via `helm uninstall` of `temporal-worker-controller` chart
- CRDs can be uninstalled separately with `helm uninstall temporal-worker-controller-crds`. WARNING: Uninstalling CRDs will delete all Custom Resources in your cluster that use those CRDs.

## Compatibility Commitment

> **CRD chart version N is forward-compatible with controller chart versions N and N−1.**
Comment thread
carlydf marked this conversation as resolved.

- CRD changes are **additive-only** within a minor version (no field removals, no type changes)
- Rolling back the controller one minor version while keeping the current CRDs is always safe
- Upgrading CRDs ahead of the controller (within one minor version) is always safe
- Structural CRD breaking changes (if ever needed) require a new API version (e.g., `v1beta1`) with a migration guide

### What the commitment requires of each release

- All new fields must be marked optional (`+optional`, `omitempty`) — no new required fields may be added within a minor version
- No existing fields may be removed or have their types changed within a minor version
- These rules apply to both spec and status fields

## Initial Installation

Install the CRDs chart first, then the controller chart:

```bash
# 1. Install CRDs
helm install temporal-worker-controller-crds \
oci://docker.io/temporalio/temporal-worker-controller-crds \
--version <version> \
--namespace temporal-system \
--create-namespace

# 2. Install the controller
helm install temporal-worker-controller \
oci://docker.io/temporalio/temporal-worker-controller \
--version <version> \
--namespace temporal-system
```

## Upgrading

Always upgrade the CRDs chart before the controller chart:

```bash
# 1. Upgrade CRDs first
helm upgrade temporal-worker-controller-crds \
oci://docker.io/temporalio/temporal-worker-controller-crds \
--version <new-version> \
--namespace temporal-system

# 2. Then upgrade the controller
helm upgrade temporal-worker-controller \
oci://docker.io/temporalio/temporal-worker-controller \
--version <new-version> \
--namespace temporal-system
```

## Rollback

Roll back the controller first; CRDs can optionally be rolled back afterward (usually not needed):

```bash
# 1. Roll back the controller (CRDs remain at current version — safe per the compatibility commitment)
helm rollback temporal-worker-controller --namespace temporal-system

# 2. Optionally roll back CRDs
helm rollback temporal-worker-controller-crds --namespace temporal-system
```

### CRD rollback and field pruning

**Recommendation:** When a controller rollback is needed, prefer keeping CRDs at the newer version and rolling back only the controller. Per the compatibility commitment this is always safe. Only roll back CRDs if you have a specific reason and have verified no objects are using the fields being removed from the schema (see below).

#### How Kubernetes handles rolled-back CRDs

When a CRD schema changes, Kubernetes does not retroactively re-validate or re-prune existing objects. Fields not present in the current schema are silently dropped ("pruned") only when an object is next written through the API server — any UPDATE or PATCH: `kubectl apply`, a GitOps reconciliation cycle, a user editing a field, or any tooling that touches the object.

#### The specific risk

If objects on the cluster have spec fields that were added in CRD version N+1 (e.g., `spec.newFeature: enabled`), and the CRD is rolled back to N (which does not define `spec.newFeature`):

- Those objects still show the field when you `kubectl get` them — the data is still in etcd.
- On the next write to any of those objects — even an unrelated change like updating `spec.replicas` — the API server silently drops `spec.newFeature`.
- This is **permanent data loss with no error or warning**.

#### Why the controller's write patterns affect the risk

The controller never writes back to TWD spec; it only writes to the status subresource and manages child Kubernetes Deployments. This means the controller itself will not directly trigger spec field pruning. However:

- GitOps tools (Flux, ArgoCD), manual `kubectl apply`, or any tooling that writes to the TWD object will trigger pruning on its next sync cycle.
- Status: the controller fully reconstitutes status from live cluster state on every reconcile. Status fields added in N+1 will disappear after one reconcile cycle regardless of CRD version. This is expected behavior and not meaningful data loss.

#### How to determine if CRD rollback is safe

Before rolling back CRDs, check whether any `TemporalWorkerDeployment` objects on the cluster are using fields that exist in the newer CRD version but not the older one:

```bash
# Replace <field-added-in-newer-version> with the field name(s) introduced in the version you are rolling back from
kubectl get temporalworkerdeployments -A -o yaml | grep <field-added-in-newer-version>
```

If the output is empty, no objects are using those fields and rollback is safe. If output is non-empty, rolling back the CRD will cause silent data loss on the next write to those objects.

## Migration Guide for Existing Users

If you are upgrading from a chart version that shipped CRDs in the `crds/` directory (Controller Helm Chart v0.12.0 and earlier), follow these steps.

When upgrading to the new chart version, Helm will **not** delete the existing CRDs — they remain on the cluster untouched. The controller continues working normally. The CRDs become temporarily "orphaned" from Helm tracking, which is fine.

### One-Time Migration

```bash
# Step 1: Upgrade the main chart as usual (CRDs on the cluster are untouched)
helm upgrade temporal-worker-controller \
oci://docker.io/temporalio/temporal-worker-controller \
--version <new-version> \
--namespace temporal-system

# Step 2: Install the CRDs chart to take Helm ownership of the existing CRDs
# kubectl apply reconciles any changes; same-version CRDs are a no-op on the cluster
helm install temporal-worker-controller-crds \
oci://docker.io/temporalio/temporal-worker-controller-crds \
--version <new-version> \
--namespace temporal-system
```

After this migration, follow the standard upgrade and rollback instructions above for all future releases.
5 changes: 5 additions & 0 deletions helm/temporal-worker-controller-crds/Chart.yaml
Comment thread
carlydf marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
name: temporal-worker-controller-crds
description: CRDs for the Temporal Worker Controller. Install this chart before the temporal-worker-controller chart.
Comment thread
carlydf marked this conversation as resolved.
type: application
version: 0.12.0
Comment thread
carlydf marked this conversation as resolved.
5 changes: 5 additions & 0 deletions helm/temporal-worker-controller-crds/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Temporal Worker Controller CRDs Chart

See the [documentation on CRD management](../../docs/crd-management.md) for instructions on how to use this chart.

This chart should never have a `values.yaml` file; the included manifests are only CRDs and should not be configured with parameters.
2 changes: 1 addition & 1 deletion internal/tests/internal/env_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func setupTestEnvironment(t *testing.T) (*rest.Config, client.Client, manager.Ma
t.Log("bootstrapping test environment")
testEnv := &envtest.Environment{
CRDDirectoryPaths: []string{
filepath.Join(getRepoRoot(t), "helm", "temporal-worker-controller", "crds"),
filepath.Join(getRepoRoot(t), "helm", "temporal-worker-controller-crds", "templates"),
},
ErrorIfCRDPathMissing: true,
}
Expand Down
Loading