Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 20 additions & 29 deletions .github/workflows/release-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,53 +104,44 @@ jobs:
- arch: amd64
runner: linux-amd64-cpu8
artifact: linux-amd64
task: python:build:linux:amd64
output_path: target/wheels/linux-amd64/*.whl
target: x86_64-unknown-linux-gnu
- arch: arm64
runner: linux-arm64-cpu8
artifact: linux-arm64
task: python:build:linux:arm64
output_path: target/wheels/linux-arm64/*.whl
target: aarch64-unknown-linux-gnu
runs-on: ${{ matrix.runner }}
timeout-minutes: 120
container:
image: ghcr.io/nvidia/openshell/ci:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --privileged
env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENSHELL_IMAGE_TAG: dev
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
fetch-depth: 0

- name: Mark workspace safe for git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- name: Set up mise
uses: jdx/mise-action@v2

- name: Sync Python dependencies
run: uv sync

- name: Cache Rust target and registry
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2
with:
shared-key: python-wheel-linux-${{ matrix.arch }}
cache-directories: .cache/sccache
cache-targets: "true"
- name: Generate Python protobuf stubs
run: uv sync --group dev && mise run python:proto

- name: Build Python wheels
- name: Patch workspace version
if: needs.compute-versions.outputs.cargo_version != ''
run: |
set -euo pipefail
OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" mise run ${{ matrix.task }}
ls -la ${{ matrix.output_path }}
sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "${{ needs.compute-versions.outputs.cargo_version }}"/}' Cargo.toml

- name: Build Python wheel
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: 2_28
args: --release --features bundled-z3 --compatibility manylinux_2_28 --out dist
before-script-linux: |
dnf install -y --setopt=install_weak_deps=False \
clang llvm-devel openssl-devel perl-core perl-IPC-Cmd

- name: Upload wheel artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: python-wheels-${{ matrix.artifact }}
path: ${{ matrix.output_path }}
path: dist/*.whl
retention-days: 5

build-python-wheel-macos:
Expand Down
49 changes: 20 additions & 29 deletions .github/workflows/release-tag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,54 +133,45 @@ jobs:
- arch: amd64
runner: linux-amd64-cpu8
artifact: linux-amd64
task: python:build:linux:amd64
output_path: target/wheels/linux-amd64/*.whl
target: x86_64-unknown-linux-gnu
- arch: arm64
runner: linux-arm64-cpu8
artifact: linux-arm64
task: python:build:linux:arm64
output_path: target/wheels/linux-arm64/*.whl
target: aarch64-unknown-linux-gnu
runs-on: ${{ matrix.runner }}
timeout-minutes: 120
container:
image: ghcr.io/nvidia/openshell/ci:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --privileged
env:
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENSHELL_IMAGE_TAG: ${{ needs.compute-versions.outputs.semver }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
ref: ${{ inputs.tag || github.ref }}
fetch-depth: 0

- name: Mark workspace safe for git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- name: Set up mise
uses: jdx/mise-action@v2

- name: Sync Python dependencies
run: uv sync

- name: Cache Rust target and registry
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2
with:
shared-key: python-wheel-linux-${{ matrix.arch }}
cache-directories: .cache/sccache
cache-targets: "true"
- name: Generate Python protobuf stubs
run: uv sync --group dev && mise run python:proto

- name: Build Python wheels
- name: Patch workspace version
if: needs.compute-versions.outputs.cargo_version != ''
run: |
set -euo pipefail
OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" mise run ${{ matrix.task }}
ls -la ${{ matrix.output_path }}
sed -i -E '/^\[workspace\.package\]/,/^\[/{s/^version[[:space:]]*=[[:space:]]*".*"/version = "${{ needs.compute-versions.outputs.cargo_version }}"/}' Cargo.toml

- name: Build Python wheel
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.target }}
manylinux: 2_28
args: --release --features bundled-z3 --compatibility manylinux_2_28 --out dist
before-script-linux: |
dnf install -y --setopt=install_weak_deps=False \
clang llvm-devel openssl-devel perl-core perl-IPC-Cmd

- name: Upload wheel artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: python-wheels-${{ matrix.artifact }}
path: ${{ matrix.output_path }}
path: dist/*.whl
retention-days: 5

build-python-wheel-macos:
Expand Down
99 changes: 99 additions & 0 deletions architecture/build-containers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Container Images

OpenShell produces two container images, both published for `linux/amd64` and `linux/arm64`.

## Gateway (`openshell/gateway`)

The gateway runs the control plane API server. It is deployed as a StatefulSet inside the cluster container via a bundled Helm chart.

- **Docker target**: `gateway` in `deploy/docker/Dockerfile.images`
- **Registry**: `ghcr.io/nvidia/openshell/gateway:latest`
- **Pulled when**: Cluster startup (the Helm chart triggers the pull)
- **Entrypoint**: `openshell-gateway --port 8080` (gRPC + HTTP, mTLS)

## Cluster (`openshell/cluster`)

The cluster image is a single-container Kubernetes distribution that bundles the Helm charts, Kubernetes manifests, and the `openshell-sandbox` supervisor binary needed to bootstrap the control plane.

- **Docker target**: `cluster` in `deploy/docker/Dockerfile.images`
- **Registry**: `ghcr.io/nvidia/openshell/cluster:latest`
- **Pulled when**: `openshell gateway start`

The supervisor binary (`openshell-sandbox`) is built before the image build, staged under `deploy/docker/.build/prebuilt-binaries/<arch>/`, and copied into the cluster image at `/opt/openshell/bin/openshell-sandbox`. It is exposed to sandbox pods at runtime via a read-only `hostPath` volume mount — it is not baked into sandbox images.

## Image Build Pipeline

`deploy/docker/Dockerfile.images` no longer compiles Rust. CI calls `.github/workflows/shadow-rust-native-build.yml` through `workflow_call` to build `openshell-gateway` or `openshell-sandbox` natively on the target architecture. `.github/workflows/docker-build.yml` downloads the resulting artifact, stages it at `deploy/docker/.build/prebuilt-binaries/<arch>/`, builds the per-arch image with the local Buildx driver, and merges multi-arch pushes with `docker buildx imagetools create`.

Local Docker builds use `tasks/scripts/stage-prebuilt-binaries.sh` through `tasks/scripts/docker-build-image.sh` before invoking Docker, so clean checkouts do not need to create the staging directory manually.

## Standalone Gateway Binary

OpenShell also publishes a standalone `openshell-gateway` binary as a GitHub release asset.

- **Source crate**: `crates/openshell-server`
- **Artifact name**: `openshell-gateway-<target>.tar.gz`
- **Targets**: `x86_64-unknown-linux-gnu`, `aarch64-unknown-linux-gnu`, `aarch64-apple-darwin`
- **Release workflows**: `.github/workflows/release-dev.yml`, `.github/workflows/release-tag.yml`
- **Installer**: None yet. The binary is a manual-download asset.

Both the standalone artifact and the deployed container image use the `openshell-gateway` binary.

## Python Wheels

OpenShell also publishes Python wheels for `linux/amd64`, `linux/arm64`, and macOS ARM64.

- Released Linux wheels are built per-arch using `PyO3/maturin-action` with `manylinux: 2_28`. The action pulls the PyPA `manylinux_2_28` container, installs Rust from `rust-toolchain.toml`, and runs `maturin build --features bundled-z3 --compatibility manylinux_2_28`. The resulting wheels install on any Linux with glibc >= 2.28 (RHEL 8+, Ubuntu 18.04+, Debian 10+). This follows the same pattern used by ruff and uv.
- For fast local iteration, `build:python:wheel:linux:{amd64,arm64}` build natively on the host (wheels tagged for the host glibc, not portable).
- The macOS ARM64 wheel is cross-compiled with `deploy/docker/Dockerfile.python-wheels-macos` via `build:python:wheel:macos`.
- Release workflows mirror the CLI layout: a Linux matrix job for amd64/arm64, a separate macOS job, and release jobs that download the per-platform wheel artifacts directly before publishing.

## Sandbox Images

Sandbox images are **not built in this repository**. They are maintained in the [openshell-community](https://github.com/nvidia/openshell-community) repository and pulled from `ghcr.io/nvidia/openshell-community/sandboxes/` at runtime.

The default sandbox image is `ghcr.io/nvidia/openshell-community/sandboxes/base:latest`. To use a named community sandbox:

```bash
openshell sandbox create --from <name>
```

This pulls `ghcr.io/nvidia/openshell-community/sandboxes/<name>:latest`.

## Local Development

`mise run cluster` is the primary development command. It bootstraps a cluster if one doesn't exist, then performs incremental deploys for subsequent runs.

The incremental deploy (`cluster-deploy-fast.sh`) fingerprints local Git changes and only rebuilds components whose files have changed:

| Changed files | Rebuild triggered |
|---|---|
| Cargo manifests, proto definitions, prebuilt staging script | Gateway + supervisor |
| `crates/openshell-server/*`, `crates/openshell-ocsf/*`, `deploy/docker/Dockerfile.images` | Gateway |
| `crates/openshell-sandbox/*`, `crates/openshell-policy/*` | Supervisor |
| `deploy/helm/openshell/*` | Helm upgrade |

When no local changes are detected, the command is a no-op.

**Gateway updates** are pushed to a local registry and the StatefulSet is restarted. **Supervisor updates** are copied directly into the running cluster container via `docker cp` — new sandbox pods pick up the updated binary immediately through the hostPath mount, with no image rebuild or cluster restart required.

Fingerprints are stored in `.cache/cluster-deploy-fast.state`. You can also target specific components explicitly:

```bash
mise run cluster -- gateway # rebuild gateway only
mise run cluster -- supervisor # rebuild supervisor only
mise run cluster -- chart # helm upgrade only
mise run cluster -- all # rebuild everything
```

To validate incremental routing and BuildKit cache reuse locally, run:

```bash
mise run cluster:test:fast-deploy-cache
```

The harness runs isolated scenarios in temporary git worktrees, keeps its own state and cache under `.cache/cluster-deploy-fast-test/`, and writes a Markdown summary with:

- auto-detection checks for gateway-only, supervisor-only, shared, Helm-only, unrelated, and explicit-target changes
- cold vs warm rebuild comparisons for gateway and supervisor code changes
- container-ID invalidation coverage to verify gateway + Helm are retriggered when the cluster container changes
1 change: 1 addition & 0 deletions mise.lock
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ url = "https://ziglang.org/download/0.14.1/zig-aarch64-linux-0.14.1.tar.xz"
[tools.zig."platforms.linux-x64"]
checksum = "sha256:24aeeec8af16c381934a6cd7d95c807a8cb2cf7df9fa40d359aa884195c4716c"
url = "https://ziglang.org/download/0.14.1/zig-x86_64-linux-0.14.1.tar.xz"
provenance = "minisign"

[tools.zig."platforms.macos-arm64"]
checksum = "sha256:39f3dc5e79c22088ce878edc821dedb4ca5a1cd9f5ef915e9b3cc3053e8faefa"
Expand Down
19 changes: 11 additions & 8 deletions tasks/python.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,25 @@ ls -la "$WHEEL_OUTPUT_DIR"/*.whl
hide = true

["build:python:wheel:linux:amd64"]
description = "Build Python wheel for Linux amd64 natively"
description = "Build Python wheel for Linux amd64 natively (host glibc; non-portable)"
depends = ["EXPECTED_HOST_ARCH=amd64 WHEEL_OUTPUT_DIR=target/wheels/linux-amd64 build:python:wheel:linux"]
hide = true

["python:build:linux:amd64"]
description = "Alias for build:python:wheel:linux:amd64"
depends = ["build:python:wheel:linux:amd64"]
hide = true

["build:python:wheel:linux:arm64"]
description = "Build Python wheel for Linux arm64 natively"
description = "Build Python wheel for Linux arm64 natively (host glibc; non-portable)"
depends = ["EXPECTED_HOST_ARCH=arm64 WHEEL_OUTPUT_DIR=target/wheels/linux-arm64 build:python:wheel:linux"]
hide = true

# Release-pipeline aliases. CI uses PyO3/maturin-action directly (see
# .github/workflows/release-*.yml); these aliases remain for local iteration
# and produce wheels tagged for the host glibc only.
["python:build:linux:amd64"]
description = "Build Python wheel for Linux amd64 (local dev; CI uses maturin-action)"
depends = ["build:python:wheel:linux:amd64"]
hide = true

["python:build:linux:arm64"]
description = "Alias for build:python:wheel:linux:arm64"
description = "Build Python wheel for Linux arm64 (local dev; CI uses maturin-action)"
depends = ["build:python:wheel:linux:arm64"]
hide = true

Expand Down
Loading