Skip to content

Commit 58abceb

Browse files
author
jetstream authors
committed
Merge pull request #263 from AI-Hypercomputer:llama_runs_benchmarks
PiperOrigin-RevId: 753222805
2 parents 8fe9a20 + 65a7c91 commit 58abceb

File tree

2 files changed

+45
-2
lines changed

2 files changed

+45
-2
lines changed

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
env:
7373
OUTPUT_DIR: ./test_dir
7474
steps:
75-
- name: Test MOEBenchmarks
75+
- name: Test MOE Benchmarks
7676
run: |
7777
rm -rf ${OUTPUT_DIR}
7878
mkdir -p ${OUTPUT_DIR}
@@ -87,6 +87,19 @@ jobs:
8787
bash -c "
8888
bash JetStream/.github/workflows/test_moe_benchmarks.sh
8989
"
90+
- name: Test llama 70b Benchmarks
91+
run: |
92+
# sync with the image uploaded from build_stable_stack stage
93+
# Report should generated in OUTPUT_DIR depend on ENV
94+
DOCKER_OUTPUT_DIR=/output
95+
docker run \
96+
-v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR} \
97+
--env OUTPUT_DIR=${DOCKER_OUTPUT_DIR} \
98+
--privileged --net=host --rm -i \
99+
gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
100+
bash -c "
101+
bash JetStream/.github/workflows/test_llama_benchmarks.sh
102+
"
90103
- name: Upload build artifact
91104
uses: actions/upload-artifact@v4
92105
with:
@@ -161,5 +174,5 @@ jobs:
161174
162175
from: JetStream Runs
163176
secure: true
164-
attachments: ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt
177+
attachments: ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt,${{ env.BENCHMARK_REPORT_DIR }}/llama_70b_jetstream.txt
165178
body: workflow for ${{github.repository}} completed successfully!
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
3+
OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)/test_dir}
4+
5+
pip install nltk==3.8.1
6+
python -c "import nltk; nltk.download('punkt')"
7+
8+
cd maxtext
9+
10+
export TOKENIZER_PATH=assets/tokenizer.llama2
11+
export MAX_PREFILL_PREDICT_LENGTH=1024
12+
export MAX_TARGET_LENGTH=2048
13+
export MODEL_NAME=llama2-70b
14+
export ICI_FSDP_PARALLELISM=1
15+
export ICI_AUTOREGRESSIVE_PARALLELISM=1
16+
export ICI_TENSOR_PARALLELISM=-1
17+
export SCAN_LAYERS=false
18+
export WEIGHT_DTYPE=bfloat16
19+
export PER_DEVICE_BATCH_SIZE=54
20+
export LOAD_PARAMETERS_PATH=gs://jetstream-runner/llama-70B-int8/int8_
21+
22+
python MaxText/maxengine_server.py MaxText/configs/base.yml tokenizer_path=${TOKENIZER_PATH} load_parameters_path=${LOAD_PARAMETERS_PATH} max_prefill_predict_length=${MAX_PREFILL_PREDICT_LENGTH} max_target_length=${MAX_TARGET_LENGTH} model_name=${MODEL_NAME} ici_fsdp_parallelism=${ICI_FSDP_PARALLELISM} ici_autoregressive_parallelism=${ICI_AUTOREGRESSIVE_PARALLELISM} ici_tensor_parallelism=${ICI_TENSOR_PARALLELISM} scan_layers=${SCAN_LAYERS} weight_dtype=${WEIGHT_DTYPE} per_device_batch_size=${PER_DEVICE_BATCH_SIZE} checkpoint_is_quantized=True quantization=int8 quantize_kvcache=True enable_jax_profiler=True &
23+
24+
sleep 800
25+
26+
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.llama2 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/llama_70b_jetstream.txt
27+
tail -n25 ${OUTPUT_DIR}/llama_70b_jetstream.txt > ${OUTPUT_DIR}/llama_70b_jetstream.tmp && mv ${OUTPUT_DIR}/llama_70b_jetstream.tmp ${OUTPUT_DIR}/llama_70b_jetstream.txt
28+
29+
# kill Jetstream server
30+
kill -9 %%

0 commit comments

Comments
 (0)