Skip to content

Commit 5c398f6

Browse files
author
jetstream authors
committed
Merge pull request #255 from AI-Hypercomputer:yuyan-stable-stack2
PiperOrigin-RevId: 750640829
2 parents f7427db + eb2e377 commit 5c398f6

File tree

2 files changed

+96
-50
lines changed

2 files changed

+96
-50
lines changed

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,108 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
16-
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
15+
# This workflow builds a stable stack for JetStream+Maxtext, runs benchmarks,
16+
# cleans up resources, and sends notifications.
1717

18-
name: Tests
18+
name: Run Maxtext JetStream Tests
1919

2020
on:
2121
# pull_request:
2222
# push:
2323
# branches: [ "main" ]
2424
workflow_dispatch:
2525
schedule:
26-
# Run the job every 4 hours
27-
- cron: '0 */24 * * *'
26+
# Run the job daily at midnight UTC
27+
- cron: '0 0 * * *'
2828

2929
jobs:
3030
prelim:
3131
runs-on: ["self-hosted", "tpu", "v6e-8"]
3232
steps:
33+
- name: Test gsutil installation
34+
run: which gsutil >/dev/null 2>&1 || { echo >&2 "gsutil is required but not installed. Aborting"; exit 24;}
35+
- name: Cleanup old docker images
36+
run: docker system prune --all --force
37+
- name: Authenticate gcloud
38+
run: gcloud auth configure-docker us-docker.pkg.dev --quiet
39+
40+
build_stable_stack:
41+
name: Build Stable Stack
42+
needs: prelim
43+
runs-on: ["self-hosted", "tpu", "v6e-8"]
44+
env:
45+
LOCAL_IMAGE_TAG: jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
46+
steps:
3347
- uses: actions/checkout@v4
48+
- name: Build
49+
run: |
50+
pushd experimental/jetstream-maxtext-stable-stack
51+
./build.sh \
52+
LOCAL_IMAGE_TAG="${LOCAL_IMAGE_TAG}"
53+
popd
54+
- name: Test
55+
run: |
56+
pushd experimental/jetstream-maxtext-stable-stack
57+
./test.sh \
58+
LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
59+
popd
60+
- name: Upload image
61+
run: |
62+
UPLOAD_IMAGE_TAG=gcr.io/cloud-tpu-inference-test/${LOCAL_IMAGE_TAG}
63+
docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
64+
docker push ${UPLOAD_IMAGE_TAG}
65+
NIGHTLY_TAG=${UPLOAD_IMAGE_TAG%:*}:nightly
66+
NIGHTLY_TAG_DATE=${NIGHTLY_TAG}-$(date +"%Y%m%d")
67+
docker tag ${LOCAL_IMAGE_TAG} ${NIGHTLY_TAG}
68+
docker tag ${LOCAL_IMAGE_TAG} ${NIGHTLY_TAG_DATE}
69+
docker push ${NIGHTLY_TAG}
70+
docker push ${NIGHTLY_TAG_DATE}
71+
72+
benchmark_report:
73+
name: Benchmark Report
74+
needs: build_stable_stack
75+
runs-on: ["self-hosted", "tpu", "v6e-8"]
76+
container:
77+
# sync with the image uploaded from build_stable_stack stage
78+
image: gcr.io/cloud-tpu-inference-test/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
79+
options: "--net=host --privileged"
80+
env:
81+
OUTPUT_DIR: /workspace/test_dir/
82+
steps:
83+
- name: Create output directory # Ensure directory exists in container
84+
run: mkdir -p ${OUTPUT_DIR}
3485
- name: Test MOEBenchmarks
35-
run: bash .github/workflows/test_moe_benchmarks.sh
36-
# run: bash .github/workflows/test_moe_8x22b_microbenchmark.sh
37-
# - name: Test MOE long context chunked prefill - 8k
38-
# run: bash .github/workflows/benchmark_chunked_prefill.sh
39-
86+
# Report should generated in OUTPUT_DIR depend on ENV
87+
run: bash JetStream/.github/workflows/test_moe_benchmarks.sh
88+
- name: Upload build artifact
89+
uses: actions/upload-artifact@v4
90+
with:
91+
name: benchmark_report
92+
path: ${{ env.OUTPUT_DIR }}
93+
94+
clean_up:
95+
if: ${{ always() }} # always execute, regardless of previous jobs or steps.
96+
needs: [build_stable_stack, benchmark_report]
97+
name: "Clean up"
98+
runs-on: ["self-hosted"]
99+
permissions:
100+
contents: read
101+
issues: write # for failed-build-issue
102+
steps:
103+
- name: Delete TPU image
104+
# sync with the image uploaded from build_stable_stack stage
105+
run: gcloud container images delete gcr.io/cloud-tpu-inference-test/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
106+
40107
notify:
41108
name: Notify test build # creates an issue or modifies last open existing issue for failed build
42-
needs: [prelim]
109+
needs: [build_stable_stack, benchmark_report]
43110
runs-on: ["self-hosted", "tpu", "v6e-8"]
44111
steps:
112+
- name: Download benchmark artifact
113+
uses: actions/download-artifact@v4
114+
with:
115+
name: benchmark_report
116+
path: ./benchmark_report
45117
- name: Check whether one of the jobs failed
46118
if: ${{ failure() }}
47119
uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
@@ -61,7 +133,5 @@ jobs:
61133
62134
from: JetStream Runs
63135
secure: true
64-
attachments: ~/test_dir/moe_8x7b.txt,~/test_dir/moe_8x22b.txt,~/test_dir/moe_8x22b_long_context_8k_prefill.txt,~/test_dir/moe_8x7b_jetstream.txt
136+
attachments: ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
65137
body: workflow for ${{github.repository}} completed successfully!
66-
- name: Cleanup
67-
run: rm -rf ~/test_dir
Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,23 @@
11
#!/bin/bash
2-
mkdir ~/test_dir
3-
cd ~/test_dir
4-
git clone https://github.com/google/maxtext.git
5-
6-
cd ~/test_dir
7-
git clone https://github.com/google/JetStream.git
8-
cd ~/test_dir
9-
sudo apt-get -y update
10-
sudo apt-get -y install python3.10-venv
11-
sudo apt-get -y install jq
12-
python -m venv .env
13-
source .env/bin/activate
14-
15-
cd ~/test_dir
16-
cd JetStream
17-
pip install -e .
18-
cd benchmarks
19-
pip install -r requirements.in
20-
21-
cd ~/test_dir
22-
cd maxtext/
23-
pip3 install wheel
24-
bash setup.sh MODE=stable DEVICE=tpu
2+
3+
OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)/test_dir}
254

265
pip install nltk==3.8.1
6+
python -c "import nltk; nltk.download('punkt')"
277

8+
cd maxtext
289

2910
# moe 8x7b microbenchmark
30-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ~/test_dir/moe_8x7b.txt
31-
tail -n5 ~/test_dir/moe_8x7b.txt > ~/test_dir/moe_8x7b.tmp && mv ~/test_dir/moe_8x7b.tmp ~/test_dir/moe_8x7b.txt
11+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ${OUTPUT_DIR}/moe_8x7b.txt
12+
tail -n5 ${OUTPUT_DIR}/moe_8x7b.txt > ${OUTPUT_DIR}/moe_8x7b.tmp && mv ${OUTPUT_DIR}/moe_8x7b.tmp ${OUTPUT_DIR}/moe_8x7b.txt
3213

3314
# moe 8x22B microbenchmark
34-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ~/test_dir/moe_8x22b.txt
35-
tail -n5 ~/test_dir/moe_8x22b.txt > ~/test_dir/moe_8x22b.tmp && mv ~/test_dir/moe_8x22b.tmp ~/test_dir/moe_8x22b.txt
15+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ${OUTPUT_DIR}/moe_8x22b.txt
16+
tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp && mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
3617

3718
# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
38-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ~/test_dir/moe_8x22b_long_context_8k_prefill.txt
39-
tail -n5 ~/test_dir/moe_8x22b_long_context_8k_prefill.txt > ~/test_dir/moe_8x22b_long_context_8k_prefill.tmp && mv ~/test_dir/moe_8x22b_long_context_8k_prefill.tmp ~/test_dir/moe_8x22b_long_context_8k_prefill.txt
19+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
20+
tail -n5 ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp && mv ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
4021

4122

4223
# moe 8x7B Maxtext Jetstream
@@ -47,13 +28,8 @@ sleep 600
4728

4829
cd ..
4930

50-
# copy openorca datset
51-
gsutil cp gs://jetstream-runner/datasets/open_orca_gpt4_tokenized_llama.calibration_1000.pkl JetStream/benchmarks/
52-
53-
python -c "import nltk; nltk.download('punkt')"
54-
55-
python JetStream/benchmarks/benchmark_serving.py --tokenizer ~/test_dir/maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ~/test_dir/moe_8x7b_jetstream.txt
56-
tail -n25 ~/test_dir/moe_8x7b_jetstream.txt > ~/test_dir/moe_8x7b_jetstream.tmp && mv ~/test_dir/moe_8x7b_jetstream.tmp ~/test_dir/moe_8x7b_jetstream.txt
31+
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
32+
tail -n25 ${OUTPUT_DIR}/moe_8x7b_jetstream.txt > ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp && mv ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
5733

5834
# kill Jetstream server
5935
kill -9 %%

0 commit comments

Comments
 (0)