Skip to content

Commit

Permalink
Reduce PR testing matrix. (#2436)
Browse files Browse the repository at this point in the history
* Remove file annotation from verbose matrix warnings.

* Allow 'min', 'max', 'minmax' values for matrix `std` tags.

* Error when no supported stds available.

* Reduce PR testing matrix.

1. Temporarily remove current nightly matrix pending NVKS bringup.
2. Move current per-PR matrix to nightly.
3. Reduce the number of jobs in the PR matrix while maintaining decent coverage.

Before: (total jobs: 437)
|  320 | `linux-amd64-cpu16`
|   66 | `linux-amd64-gpu-v100-latest-1`
|   28 | `linux-arm64-cpu16`
|   23 | `windows-amd64-cpu16`

After (total jobs: 183)
|  126 | `linux-amd64-cpu16`
|   26 | `linux-amd64-gpu-v100-latest-1`
|   21 | `windows-amd64-cpu16`
|   10 | `linux-arm64-cpu16`

* Restore old build matrix.
  • Loading branch information
alliepiper committed Sep 19, 2024
1 parent b07f036 commit ee94bb9
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 30 deletions.
32 changes: 23 additions & 9 deletions .github/actions/workflow-build/build-workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,8 @@ def lookup_supported_stds(matrix_job):
if 'project' in matrix_job:
project = get_project(matrix_job['project'])
stds = stds & set(project['stds'])
if len(stds) == 0:
raise Exception(error_message_with_matrix_job(matrix_job, "No supported stds found."))
return sorted(list(stds))


Expand Down Expand Up @@ -626,18 +628,18 @@ def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig):
matching_consumers = merged_consumers[producer_index]

producer_name = producer['name']
print(f"::notice file=ci/matrix.yaml::Merging consumers for duplicate producer '{producer_name}' in '{group_name}'",
print(f"::notice::Merging consumers for duplicate producer '{producer_name}' in '{group_name}'",
file=sys.stderr)
consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers])
print(f"::notice file=ci/matrix.yaml::Original consumers: {consumer_names}", file=sys.stderr)
print(f"::notice::Original consumers: {consumer_names}", file=sys.stderr)
consumer_names = ", ".join([consumer['name'] for consumer in consumers])
print(f"::notice file=ci/matrix.yaml::Duplicate consumers: {consumer_names}", file=sys.stderr)
print(f"::notice::Duplicate consumers: {consumer_names}", file=sys.stderr)
# Merge if unique:
for consumer in consumers:
if not dispatch_job_in_container(consumer, matching_consumers):
matching_consumers.append(consumer)
consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers])
print(f"::notice file=ci/matrix.yaml::Merged consumers: {consumer_names}", file=sys.stderr)
print(f"::notice::Merged consumers: {consumer_names}", file=sys.stderr)
else:
merged_producers.append(producer)
merged_consumers.append(consumers)
Expand All @@ -653,7 +655,7 @@ def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig):
unique_standalone_jobs = []
for job_json in standalone_jobs:
if dispatch_job_in_container(job_json, unique_standalone_jobs):
print(f"::notice file=ci/matrix.yaml::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'",
print(f"::notice::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'",
file=sys.stderr)
else:
unique_standalone_jobs.append(job_json)
Expand All @@ -663,12 +665,12 @@ def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig):
for two_stage_job in two_stage_jobs:
for producer in two_stage_job['producers']:
if remove_dispatch_job_from_container(producer, unique_standalone_jobs):
print(f"::notice file=ci/matrix.yaml::Removing standalone job '{producer['name']}' " +
print(f"::notice::Removing standalone job '{producer['name']}' " +
f"as it appears as a producer in '{group_name}'",
file=sys.stderr)
for consumer in two_stage_job['consumers']:
if remove_dispatch_job_from_container(producer, unique_standalone_jobs):
print(f"::notice file=ci/matrix.yaml::Removing standalone job '{consumer['name']}' " +
print(f"::notice::Removing standalone job '{consumer['name']}' " +
f"as it appears as a consumer in '{group_name}'",
file=sys.stderr)
standalone_jobs = list(unique_standalone_jobs)
Expand Down Expand Up @@ -864,8 +866,20 @@ def set_derived_tags(matrix_job):
gpu = get_gpu(matrix_job['gpu'])
matrix_job['sm'] = gpu['sm']

if 'std' in matrix_job and matrix_job['std'] == 'all':
matrix_job['std'] = lookup_supported_stds(matrix_job)
if 'std' in matrix_job:
if matrix_job['std'] == 'all':
matrix_job['std'] = lookup_supported_stds(matrix_job)
elif matrix_job['std'] == 'min':
matrix_job['std'] = min(lookup_supported_stds(matrix_job))
elif matrix_job['std'] == 'max':
matrix_job['std'] = max(lookup_supported_stds(matrix_job))
elif matrix_job['std'] == 'minmax':
stds = lookup_supported_stds(matrix_job)
if len(stds) == 1:
matrix_job['std'] = stds[0]
else:
matrix_job['std'] = [min(stds), max(stds)]


# Add all deps before applying project job maps:
for job in matrix_job['jobs']:
Expand Down
57 changes: 36 additions & 21 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ workflows:
# Old CTK
- {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
- {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
# Current CTK
# Current CTK build-only
- {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
# Current CTK testing:
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'minmax', cxx: ['gcc']}
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['clang', 'msvc']}
# Split up cub tests:
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'minmax', cxx: ['gcc']}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']}
# Modded builds:
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'all', cxx: ['gcc'], sm: '90a'}
Expand All @@ -36,33 +42,41 @@ workflows:
- {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 17, cxx: ['gcc12'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 17, cxx: ['gcc13'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'min', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'max', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'max', cxx: ['clang18']}
# Python jobs:
- {jobs: ['test'], project: 'pycuda', ctk: ['12.5']}
# cccl-infra:
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9']}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
# Edge-case jobs
- {jobs: ['limited'], project: 'cub', std: 17}

nightly:
- {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc6', std: [11]}
- {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang9', std: [17]}
- {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
- {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
- {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
- {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang9', std: [11]}
# H100 runners are currently flakey, only build since those use CPU-only runners:
- {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
- {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
# Increased test coverage compared to nightlies:
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}
# Edge-case jobs
- {jobs: ['limited'], project: 'cub', std: 17}

# nvrtc:
- {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
- {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
- {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
- {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}
# # These are waiting on the NVKS nodes:
# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc6', std: [11]}
# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang9', std: [17]}
# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang9', std: [11]}
# # H100 runners are currently flakey, only build since those use CPU-only runners:
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
#
# # nvrtc:
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}

# Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
exclude:
Expand Down Expand Up @@ -256,6 +270,7 @@ tags:
project: { default: ['libcudacxx', 'cub', 'thrust'] }
# C++ standard
# If set to 'all', all stds supported by the ctk/compilers/project are used.
# If set to 'min', 'max', or 'minmax', the minimum, maximum, or both stds are used.
# If set, will be passed to script with `-std <std>`.
std: { required: false }
# GPU architecture
Expand Down

0 comments on commit ee94bb9

Please sign in to comment.