Skip to content

Add gpu_multi_process_run.sh #99

Add gpu_multi_process_run.sh

Add gpu_multi_process_run.sh #99

Workflow file for this run

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
name: Build Tests
on:
push:
branches: ["main"]
pull_request: # By default this runs for types assigned, opened and synchronize.
env:
# Names must be unique in parallel running tests.
TPU_CLUSTER_NAME: build-test-2-v4-8-nodepool
jobs:
cluster-create-and-delete:
runs-on: [ubuntu-20.04]
concurrency: # We support one build or nightly test to run at a time currently.
group: build-test-cluster-group
cancel-in-progress: false
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.10'
- uses: 'google-github-actions/auth@v2'
with:
credentials_json: '${{ secrets.GCP_SA_KEY }}'
- uses: google-github-actions/setup-gcloud@v2
with:
version: '>= 363.0.0'
install_components: 'beta,gke-gcloud-auth-plugin'
- name: Verify gcp setup
run: gcloud info
- name: Create an XPK Cluster with 2x v4-8 nodepools
run: python xpk.py cluster create --cluster $TPU_CLUSTER_NAME --device-type=v4-8 --num-slices=2 --zone=us-central2-b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}'
- name: Authenticate Docker
run: gcloud auth configure-docker --quiet
- name: Create test script to execute in workloads
run: echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > test.sh
- name: Run a base-docker-image workload
run: python xpk.py workload create --cluster $TPU_CLUSTER_NAME --workload nightly-test-xpk-basic --command "bash test.sh" --tpu-type=v4-8 --num-slices=2 --zone=us-central2-b
- name: List out the workloads on the cluster
run: python3 xpk.py workload list --cluster $TPU_CLUSTER_NAME --zone=us-central2-b
- name: Run xpk inspector with the workload created above
run: python3 xpk.py inspector --cluster $TPU_CLUSTER_NAME --zone=us-central2-b --workload nightly-test-xpk-basic
- name: Delete the workload on the cluster
run: python3 xpk.py workload delete --workload nightly-test-xpk-basic --cluster $TPU_CLUSTER_NAME --zone=us-central2-b
- name: Delete the cluster created
if: always()
run: python xpk.py cluster delete --cluster $TPU_CLUSTER_NAME --zone=us-central2-b