From 2babc988c128a7ef5fb7cada650bb03c99b0ccd3 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Tue, 23 Apr 2024 00:28:22 +0000
Subject: [PATCH 1/4] Don't fail on bad-alloc for large memory test.

---
 cub/test/catch2_test_device_select_if.cu | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cub/test/catch2_test_device_select_if.cu b/cub/test/catch2_test_device_select_if.cu
index 38a071e004..e38f9957d6 100644
--- a/cub/test/catch2_test_device_select_if.cu
+++ b/cub/test/catch2_test_device_select_if.cu
@@ -394,6 +394,7 @@ CUB_TEST("DeviceSelect::If works with a different output type", "[device][select
 }
 
 CUB_TEST("DeviceSelect::If works for very large number of items", "[device][select_if]", offset_types)
+try
 {
   using type     = std::int64_t;
   using offset_t = typename c2h::get<0, TestType>;
@@ -434,8 +435,13 @@ CUB_TEST("DeviceSelect::If works for very large number of items", "[device][sele
   bool all_results_correct = thrust::equal(out.cbegin(), out.cend(), expected_out_it);
   REQUIRE(all_results_correct == true);
 }
+catch (std::bad_alloc&)
+{
+  // Exceeding memory is not a failure.
+}
 
 CUB_TEST("DeviceSelect::If works for very large number of output items", "[device][select_if]", offset_types)
+try
 {
   using type     = std::uint8_t;
   using offset_t = typename c2h::get<0, TestType>;
@@ -472,3 +478,7 @@ CUB_TEST("DeviceSelect::If works for very large number of output items", "[devic
   REQUIRE(num_selected_out[0] == num_items);
   REQUIRE(in == out);
 }
+catch (std::bad_alloc&)
+{
+  // Exceeding memory is not a failure.
+}

From e40e51e9973661f6a0174e7b92aa0b299d321b18 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Sat, 20 Apr 2024 03:25:45 +0000
Subject: [PATCH 2/4] Rebuild CI infra, add nightly workflow.

---
 .devcontainer/make_devcontainers.sh           |  10 +-
 .github/actions/compute-matrix/action.yml     |  25 -
 .../actions/compute-matrix/compute-matrix.sh  |  82 --
 .github/actions/workflow-build/action.yml     |  95 +++
 .../actions/workflow-build/build-workflow.py  | 794 ++++++++++++++++++
 .github/actions/workflow-results/action.yml   |  96 +++
 .../actions/workflow-results/final-summary.py |  50 ++
 .../prepare-execution-summary.py              | 247 ++++++
 .../workflow-results/verify-job-success.py    |  30 +
 .github/workflows/build-and-test-linux.yml    |  47 --
 .github/workflows/build-and-test-windows.yml  |  48 --
 .github/workflows/ci-workflow-nightly.yml     | 107 +++
 .../workflows/ci-workflow-pull-request.yml    | 134 +++
 .github/workflows/dispatch-build-and-test.yml |  51 --
 .github/workflows/pr.yml                      | 242 ------
 .github/workflows/run-as-coder.yml            |  68 --
 .github/workflows/workflow-dispatch-job.yml   | 166 ++++
 .../workflows/workflow-dispatch-two-stage.yml |  64 ++
 .github/workflows/workflow-dispatch.yml       |  53 ++
 .gitignore                                    |   2 +-
 CMakePresets.json                             |   2 +-
 ci-overview.md                                |   2 +
 ci/build_common.sh                            |   7 +-
 ci/infra_cccl.sh                              |  20 +
 ci/inspect_changes.sh                         |  95 ++-
 ci/matrix.yaml                                | 301 +++++--
 ...odegen.sh => verify_codegen_libcudacxx.sh} |   0
 27 files changed, 2181 insertions(+), 657 deletions(-)
 delete mode 100644 .github/actions/compute-matrix/action.yml
 delete mode 100755 .github/actions/compute-matrix/compute-matrix.sh
 create mode 100644 .github/actions/workflow-build/action.yml
 create mode 100755 .github/actions/workflow-build/build-workflow.py
 create mode 100644 .github/actions/workflow-results/action.yml
 create mode 100755 .github/actions/workflow-results/final-summary.py
 create mode 100755 .github/actions/workflow-results/prepare-execution-summary.py
 create mode 100755 .github/actions/workflow-results/verify-job-success.py
 delete mode 100644 .github/workflows/build-and-test-linux.yml
 delete mode 100644 .github/workflows/build-and-test-windows.yml
 create mode 100644 .github/workflows/ci-workflow-nightly.yml
 create mode 100644 .github/workflows/ci-workflow-pull-request.yml
 delete mode 100644 .github/workflows/dispatch-build-and-test.yml
 delete mode 100644 .github/workflows/pr.yml
 delete mode 100644 .github/workflows/run-as-coder.yml
 create mode 100644 .github/workflows/workflow-dispatch-job.yml
 create mode 100644 .github/workflows/workflow-dispatch-two-stage.yml
 create mode 100644 .github/workflows/workflow-dispatch.yml
 create mode 100755 ci/infra_cccl.sh
 rename ci/{verify_codegen.sh => verify_codegen_libcudacxx.sh} (100%)

diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh
index f868cc14f1..083dc443d2 100755
--- a/.devcontainer/make_devcontainers.sh
+++ b/.devcontainer/make_devcontainers.sh
@@ -74,6 +74,7 @@ while [[ $# -gt 0 ]]; do
 done
 
 MATRIX_FILE="../ci/matrix.yaml"
+COMPUTE_MATRIX="../.github/actions/workflow-build/build-workflow.py"
 
 # Enable verbose mode if requested
 if [ "$VERBOSE" = true ]; then
@@ -82,16 +83,17 @@ if [ "$VERBOSE" = true ]; then
 fi
 
 # Read matrix.yaml and convert it to json
-matrix_json=$(yq -o json ${MATRIX_FILE})
+matrix_json=$(python3 ${COMPUTE_MATRIX} ${MATRIX_FILE} --devcontainer-info)
 
-# Exclude Windows environments
-readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))')
+if [ "$VERBOSE" = true ]; then
+    echo "$matrix_json"
+fi
 
 # Get the devcontainer image version and define image tag root
 readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version')
 
 # Get unique combinations of cuda version, compiler name/version, and Ubuntu version
-readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]')
+readonly combinations=$(echo "$matrix_json" | jq -c '.combinations[]')
 
 # Update the base devcontainer with the default values
 # The root devcontainer.json file is used as the default container as well as a template for all
diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml
deleted file mode 100644
index b8155e7aa6..0000000000
--- a/.github/actions/compute-matrix/action.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-
-name: Compute Matrix
-description: "Compute the matrix for a given matrix type from the specified matrix file"
-
-inputs:
-  matrix_query:
-    description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
-    required: true
-  matrix_file:
-    description: 'The file containing the matrix'
-    required: true
-outputs:
-  matrix:
-    description: 'The requested matrix'
-    value: ${{ steps.compute-matrix.outputs.MATRIX }}
-
-runs:
-  using: "composite"
-  steps:
-    - name: Compute matrix
-      id: compute-matrix
-      run: |
-        MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}}  ${{inputs.matrix_query}} )
-        echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
-      shell: bash -euxo pipefail {0}
diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh
deleted file mode 100755
index 1629836d21..0000000000
--- a/.github/actions/compute-matrix/compute-matrix.sh
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-write_output() {
-  local key="$1"
-  local value="$2"
-  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
-}
-
-explode_std_versions() {
-  jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
-}
-
-explode_libs() {
-  jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))'
-}
-
-# Filter out the libraries that are dirty
-filter_libs() {
-  all_libs=("libcudacxx" "thrust" "cub")
-  dirty_libs=()
-  for lib in "${all_libs[@]}"; do
-    dirty_var_name="${lib^^}_DIRTY"
-    # If the variable named in dirty_var_name is not set, set it to false:
-    : "${!dirty_var_name:=false}"
-    # Declare a nameref to the variable named in dirty_var_name
-    declare -n lib_dirty="$dirty_var_name"
-    # echo "${lib^^}_DIRTY: ${lib_dirty}" >> /dev/stderr
-    if [ "${lib_dirty}" = "true" ]; then
-      dirty_libs+=("$lib")
-    fi
-  done
-  # echo "Dirty libraries: ${dirty_libs[@]}" >> /dev/stderr
-
-  # Construct a regex to filter out the dirty libraries
-  dirty_lib_regex=$(IFS="|"; echo "${dirty_libs[*]}")
-  dirty_lib_regex="^(${dirty_lib_regex})\$"
-  jq_filter="map(select(.lib | test(\"$dirty_lib_regex\")))"
-  jq -cr "$jq_filter"
-}
-
-extract_matrix() {
-  local file="$1"
-  local type="$2"
-  local matrix=$(yq -o=json "$file" | jq -cr ".$type")
-  write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
-
-  local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
-  local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
-  write_output "PER_CUDA_COMPILER_MATRIX"  "$per_cuda_compiler_matrix"
-  write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')"
-
-  write_output "NVRTC_MATRIX" "$(echo "$matrix" | jq '.nvrtc' | explode_std_versions)"
-
-  local clang_cuda_matrix="$(echo "$matrix" | jq -cr '.["clang-cuda"]' | explode_std_versions | explode_libs | filter_libs)"
-  write_output "CLANG_CUDA_MATRIX" "$clang_cuda_matrix"
-  write_output "CCCL_INFRA_MATRIX" "$(echo "$matrix" | jq -cr '.["cccl-infra"]' )"
-}
-
-main() {
-  if [ "$1" == "-v" ]; then
-    set -x
-    shift
-  fi
-
-  if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
-    echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
-    echo "  -v            : Enable verbose output"
-    echo "  MATRIX_FILE   : The path to the matrix file."
-    echo "  MATRIX_TYPE   : The desired matrix. Supported values: 'pull_request'"
-    exit 1
-  fi
-
-  echo "Input matrix file:" >&2
-  cat "$1" >&2
-  echo "Matrix Type: $2" >&2
-
-  extract_matrix "$1" "$2"
-}
-
-main "$@"
diff --git a/.github/actions/workflow-build/action.yml b/.github/actions/workflow-build/action.yml
new file mode 100644
index 0000000000..36819b6229
--- /dev/null
+++ b/.github/actions/workflow-build/action.yml
@@ -0,0 +1,95 @@
+name: "CCCL Build Workflow"
+description: "Parses a matrix definition and exports a set of dispatchable build/test/etc jobs."
+
+inputs:
+  workflows:
+    description: "Space separated list of workflows in matrix file to run"
+    required: true
+  skip_tests:
+    description: "Skip running tests"
+    default: "false"
+    required: false
+  inspect_changes_script:
+    description: "If defined, run this script to determine which projects/deps need to be tested."
+    default: ""
+    required: false
+  inspect_changes_base_sha:
+    description: "If defined, use this base ref for inspect-changes script."
+    default: ""
+    required: false
+  matrix_file:
+    description: "Path to the matrix file in the consumer repository."
+    default: "ci/matrix.yaml"
+    required: false
+  matrix_parser:
+    description: "Path to the matrix parser script (default if blank: build-workflow.py from action dir)"
+    default: ""
+    required: false
+
+outputs:
+  workflow:
+    description: "The dispatchable workflow matrix"
+    value: ${{ steps.build-workflow.outputs.workflow }}
+  workflow_keys:
+    description: "The keys of the parsed workflow"
+    value: ${{ steps.build-workflow.outputs.workflow_keys }}
+
+runs:
+  using: "composite"
+  steps:
+
+    - name: Inspect changes
+      if: ${{ inputs.inspect_changes_script != '' && inputs.inspect_changes_base_sha != '' }}
+      id: inspect-changes
+      shell: bash --noprofile --norc -euo pipefail {0}
+      env:
+        base_ref: ${{ inputs.inspect_changes_base_sha }}
+      run: |
+        echo "Running inspect-changes script..."
+        ${{ inputs.inspect_changes_script }} ${base_ref} ${GITHUB_SHA}
+        echo "Exporting summary..."
+        mkdir workflow
+        cp ${GITHUB_STEP_SUMMARY} workflow/changes.md
+
+    - name: Parse matrix file into a workflow
+      id: build-workflow
+      shell: bash --noprofile --norc -euo pipefail {0}
+      env:
+        skip_tests: ${{ inputs.skip_tests == 'true' && '--skip-tests' || ''}}
+        dirty_projects_flag: ${{ steps.inspect-changes.outputs.dirty_projects != '' && '--dirty-projects' || ''}}
+        dirty_projects: ${{ steps.inspect-changes.outputs.dirty_projects }}
+        matrix_parser: ${{ inputs.matrix_parser && inputs.matrix_parser || '${GITHUB_ACTION_PATH}/build-workflow.py' }}
+      run: |
+        echo "Parsing matrix file into a workflow..."
+
+        ${{ env.matrix_parser }} ${{ inputs.matrix_file }}           \
+            --workflows ${{ inputs.workflows }}                      \
+            ${{ env.skip_tests }}                                    \
+            ${{ env.dirty_projects_flag }} ${{ env.dirty_projects }}
+
+        echo "::group::Workflow"
+        cat workflow/workflow.json
+        echo "::endgroup::"
+
+        echo "::group::Runners"
+        cat workflow/runner_summary.json | jq -r '"# \(.heading)\n\n\(.body)"' | tee -a "${GITHUB_STEP_SUMMARY}"
+        echo "::endgroup::"
+
+        echo "::group::Job List"
+        cat workflow/job_list.txt
+        echo "::endgroup::"
+
+        echo "Setting outputs..."
+        echo "::group::GHA Output: WORKFLOW"
+        printf "WORKFLOW=%s\n" "$(cat workflow/workflow.json | jq -c '.')" | tee -a "${GITHUB_OUTPUT}"
+        echo "::endgroup::"
+
+        echo "::group::GHA Output: WORKFLOW_KEYS"
+        printf "WORKFLOW_KEYS=%s\n" "$(cat workflow/workflow_keys.json | jq -c '.')" | tee -a "${GITHUB_OUTPUT}"
+        echo "::endgroup::"
+
+    - name: Upload artifacts
+      uses: actions/upload-artifact@v3
+      with:
+        name: workflow
+        path: workflow/
diff --git a/.github/actions/workflow-build/build-workflow.py b/.github/actions/workflow-build/build-workflow.py
new file mode 100755
index 0000000000..34ee6dfafe
--- /dev/null
+++ b/.github/actions/workflow-build/build-workflow.py
@@ -0,0 +1,794 @@
+#!/usr/bin/env python3
+
+"""
+Concepts:
+- matrix_job: an entry of a workflow matrix, converted from matrix.yaml["workflow"][id] into a JSON object.
+  Example:
+  {
+    "jobs": [
+      "test"
+    ],
+    "ctk": "11.1",
+    "gpu": "t4",
+    "sm": "75-real",
+    "cxx": {
+      "name": "llvm",
+      "version": "9",
+      "exe": "clang++"
+    },
+    "std": [
+      17
+    ],
+    "project": [
+      "libcudacxx",
+      "cub",
+      "thrust"
+    ],
+    "os": "ubuntu18.04"
+  }
+
+Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where
+the matrix job is turned into one or more dispatch groups consisting of potentially many jobs.
+
+- dispatch_group_json: A json object used in conjunction with the ci-dispatch-groups.yml GHA workflow.
+  Example:
+  {
+    "<group name>": {
+      "standalone": [ {<job_json>}, ... ]
+      "two_stage": [ {<two_stage_json>}, ]
+    }
+  }
+
+- two_stage_json: A json object that represents bulk-synchronous producer/consumer jobs, used with ci-dispatch-two-stage.yml.
+  Example:
+  {
+    "id": "<unique id>", # Used as a compact unique name for the GHA dispatch workflows.
+    "producers": [ {<job_json>}, ... ],
+    "consumers": [ {<job_json>}, ... ]
+  }
+
+- job_json: A json object that represents a single job in a workflow. Used with ci-dispatch-job.yml.
+  Example:
+  {
+    "id": "<unique id>", # Used as a compact unique name for the GHA dispatch workflows.
+    "name": "...",
+    "runner": "...",
+    "image": "...",
+    "command": "..." },
+  }
+"""
+
+import argparse
+import base64
+import copy
+import json
+import os
+import re
+import struct
+import sys
+import yaml
+
+matrix_yaml = None
+
+
+def generate_guids():
+    """
+    Simple compact global unique ID generator.
+    Produces up to 65535 unique IDs between 1-3 characters in length.
+    Throws an exception once exhausted.
+    """
+    i = 0
+    while True:
+        # Generates a base64 hash of an incrementing 16-bit integer:
+        hash = base64.b64encode(struct.pack(">H", i)).decode('ascii')
+        # Strips off up-to 2 leading 'A' characters and a single trailing '=' characters, if they exist:
+        guid = re.sub(r'^A{0,2}', '', hash).removesuffix("=")
+        yield guid
+        i += 1
+        if i >= 65535:
+            raise Exception("GUID generator exhausted.")
+
+
+guid_generator = generate_guids()
+
+
+def write_json_file(filename, json_object):
+    with open(filename, 'w') as f:
+        json.dump(json_object, f, indent=2)
+
+
+def write_text_file(filename, text):
+    with open(filename, 'w') as f:
+        print(text, file=f)
+
+
+def error_message_with_matrix_job(matrix_job, message):
+    return f"{matrix_job['origin']['workflow_location']}: {message}\n  Input: {matrix_job['origin']['original_matrix_job']}"
+
+
+def get_all_matrix_job_tags_sorted():
+    required_tags = set(matrix_yaml['required_tags'])
+    defaulted_tags = set(matrix_yaml['defaulted_tags'])
+    optional_tags = set(matrix_yaml['optional_tags'])
+    all_tags = required_tags | defaulted_tags | optional_tags
+
+    # Sorted using a highly subjective opinion on importance:
+    # Always first, information dense:
+    sorted_important_tags = ['project', 'jobs', 'cudacxx', 'cxx', 'ctk', 'gpu', 'std', 'sm', 'cpu']
+
+    # Always last, derived:
+    sorted_noise_tags = ['os', 'origin']
+
+    # In between?
+    sorted_tags = set(sorted_important_tags + sorted_noise_tags)
+    sorted_meh_tags = sorted(list(all_tags - sorted_tags))
+
+    return sorted_important_tags + sorted_meh_tags + sorted_noise_tags
+
+
+def lookup_os(ctk, host_compiler):
+    key = f'ctk{ctk}-{host_compiler["name"]}{host_compiler["version"]}'
+    if not key in matrix_yaml['default_os_lookup']:
+        raise Exception(f"Missing matrix.yaml `default_os_lookup` entry for key `{key}`")
+    return matrix_yaml['default_os_lookup'][key]
+
+
+def lookup_supported_stds(device_compiler=None, host_compiler=None):
+    stds = set(matrix_yaml['all_stds'])
+    if device_compiler:
+        key = f"{device_compiler['name']}{device_compiler['version']}"
+        if not key in matrix_yaml['lookup_cudacxx_supported_stds']:
+            raise Exception(f"Missing matrix.yaml 'lookup_cudacxx_supported_stds' entry for key '{key}'")
+        stds = stds & set(matrix_yaml['lookup_cudacxx_supported_stds'][key])
+    if host_compiler:
+        key = f"{host_compiler['name']}{host_compiler['version']}"
+        if not key in matrix_yaml['lookup_cxx_supported_stds']:
+            raise Exception(f"Missing matrix.yaml 'lookup_cxx_supported_stds' entry for key '{key}'")
+        stds = stds & set(matrix_yaml['lookup_cxx_supported_stds'][key])
+    return sorted(list(stds))
+
+
+def get_formatted_project_name(project_name):
+    if project_name in matrix_yaml['formatted_project_names']:
+        return matrix_yaml['formatted_project_names'][project_name]
+    return project_name
+
+
+def get_formatted_host_compiler_name(host_compiler):
+    config_name = host_compiler['name']
+    if config_name in matrix_yaml['formatted_cxx_names']:
+        return matrix_yaml['formatted_cxx_names'][config_name]
+    return config_name
+
+
+def get_formatted_job_type(job_type):
+    if job_type in matrix_yaml['formatted_jobs']:
+        return matrix_yaml['formatted_jobs'][job_type]
+    # Return with first letter capitalized:
+    return job_type.capitalize()
+
+
+def is_windows(matrix_job):
+    return matrix_job['os'].startswith('windows')
+
+
+def generate_dispatch_group_name(matrix_job):
+    project_name = get_formatted_project_name(matrix_job['project'])
+    ctk = matrix_job['ctk']
+    device_compiler = matrix_job['cudacxx']
+    host_compiler_name = get_formatted_host_compiler_name(matrix_job['cxx'])
+
+    compiler_info = ""
+    if device_compiler['name'] == 'nvcc':
+        compiler_info = f"nvcc {host_compiler_name}"
+    elif device_compiler['name'] == 'llvm':
+        compiler_info = f"clang-cuda"
+    else:
+        compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler_name}"
+
+    return f"{project_name} {compiler_info} CTK{ctk}"
+
+
+def generate_dispatch_job_name(matrix_job, job_type):
+    std_str = ("C++" + str(matrix_job['std']) + " ") if 'std' in matrix_job else ''
+    cpu_str = matrix_job['cpu']
+    gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_jobs'] else ""
+    cuda_compile_arch = (" sm{" + matrix_job['sm'] + "}") if 'sm' in matrix_job else ""
+    cmake_options = (' ' + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else ""
+
+    host_compiler_name = get_formatted_host_compiler_name(matrix_job['cxx'])
+    host_compiler_info = f"{host_compiler_name}{matrix_job['cxx']['version']}"
+
+    config_tag = f"{std_str}{host_compiler_info}"
+
+    formatted_job_type = get_formatted_job_type(job_type)
+
+    extra_info = f":{cuda_compile_arch}{cmake_options}" if cuda_compile_arch or cmake_options else ""
+
+    return f"[{config_tag}] {formatted_job_type}({cpu_str}{gpu_str}){extra_info}"
+
+
+def generate_dispatch_job_runner(matrix_job, job_type):
+    runner_os = "windows" if is_windows(matrix_job) else "linux"
+    cpu = matrix_job['cpu']
+
+    if not job_type in matrix_yaml['gpu_required_jobs']:
+        return f"{runner_os}-{cpu}-cpu16"
+
+    gpu = matrix_job['gpu']
+    suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else ""
+
+    return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}"
+
+
+def generate_dispatch_job_image(matrix_job, job_type):
+    devcontainer_version = matrix_yaml['devcontainer_version']
+    ctk = matrix_job['ctk']
+    image_os = matrix_job['os']
+    host_compiler = matrix_job['cxx']['name'] + matrix_job['cxx']['version']
+
+    if is_windows(matrix_job):
+        return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}"
+
+    return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}"
+
+
+def generate_dispatch_job_command(matrix_job, job_type):
+    script_path = "ci/windows" if is_windows(matrix_job) else "ci"
+    script_ext = ".ps1" if is_windows(matrix_job) else ".sh"
+    script_job_type = job_type
+    script_project = matrix_job['project']
+    script_name = f"{script_path}/{script_job_type}_{script_project}{script_ext}"
+
+    std_str = str(matrix_job['std']) if 'std' in matrix_job else ''
+
+    host_compiler_exe = matrix_job['cxx']['exe']
+    device_compiler_name = matrix_job['cudacxx']['name']
+    device_compiler_exe = matrix_job['cudacxx']['exe']
+
+    cuda_compile_arch = matrix_job['sm'] if 'sm' in matrix_job else ''
+    cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else ''
+
+    command = f"\"{script_name}\""
+    if std_str:
+        command += f" -std \"{std_str}\""
+    if cuda_compile_arch:
+        command += f" -arch \"{cuda_compile_arch}\""
+    if device_compiler_name != 'nvcc':
+        command += f" -cuda \"{device_compiler_exe}\""
+    if cmake_options:
+        cmake_args = " ".join([f"{key}={value}" for key, value in cmake_options.items()])
+        command += f" -cmake-options \"{cmake_args}\""
+
+    return command
+
+
+def generate_dispatch_job_origin(matrix_job, job_type):
+    origin = matrix_job['origin'].copy()
+
+    matrix_job = matrix_job.copy()
+    del matrix_job['origin']
+
+    matrix_job['jobs'] = job_type
+
+    if 'cxx' in matrix_job:
+        host_compiler = matrix_job['cxx']
+        formatted_name = get_formatted_host_compiler_name(host_compiler)
+        matrix_job['cxx_name'] = formatted_name
+        matrix_job['cxx_full'] = formatted_name + host_compiler['version']
+        del matrix_job['cxx']
+
+    if 'cudacxx' in matrix_job:
+        device_compiler = matrix_job['cudacxx']
+        formatted_name = 'clang-cuda' if device_compiler['name'] == 'llvm' else device_compiler['name']
+        matrix_job['cudacxx_name'] = formatted_name
+        matrix_job['cudacxx_full'] = formatted_name + device_compiler['version']
+        del matrix_job['cudacxx']
+
+    origin['matrix_job'] = matrix_job
+
+    return origin
+
+
+def generate_dispatch_job_json(matrix_job, job_type):
+    return {
+        'name': generate_dispatch_job_name(matrix_job, job_type),
+        'runner': generate_dispatch_job_runner(matrix_job, job_type),
+        'image': generate_dispatch_job_image(matrix_job, job_type),
+        'command': generate_dispatch_job_command(matrix_job, job_type),
+        'origin': generate_dispatch_job_origin(matrix_job, job_type)
+    }
+
+
+# Create a single build producer, and a separate consumer for each test_job_type:
+def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types):
+    build_json = generate_dispatch_job_json(matrix_job, build_job_type)
+
+    test_json = []
+    for test_job_type in test_job_types:
+        test_json.append(generate_dispatch_job_json(matrix_job, test_job_type))
+
+    return {
+        "producers": [build_json],
+        "consumers": test_json
+    }
+
+
+def generate_dispatch_group_jobs(matrix_job):
+    dispatch_group_jobs = {
+        "standalone": [],
+        "two_stage": []
+    }
+
+    job_types = set(matrix_job['jobs'])
+
+    build_required = set(matrix_yaml['build_required_jobs']) & job_types
+    has_build_and_test = len(build_required) > 0
+    job_types -= build_required
+
+    has_standalone_build = 'build' in job_types and not has_build_and_test
+    job_types -= {'build'}
+
+    if has_standalone_build:
+        dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, "build"))
+    elif has_build_and_test:
+        dispatch_group_jobs['two_stage'].append(
+            generate_dispatch_build_and_test_json(matrix_job, "build", build_required))
+
+    # Remaining jobs are assumed to be standalone (e.g. nvrtc):
+    for job_type in job_types:
+        dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type))
+
+    return dispatch_group_jobs
+
+
+def matrix_job_to_dispatch_group(matrix_job, group_prefix=""):
+    return {group_prefix + generate_dispatch_group_name(matrix_job):
+            generate_dispatch_group_jobs(matrix_job)}
+
+
+def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups):
+    for group_name, group_json in new_dispatch_groups.items():
+        if group_name not in accum_dispatch_groups:
+            accum_dispatch_groups[group_name] = group_json
+        else:
+            # iterate standalone and two_stage:
+            for key, value in group_json.items():
+                accum_dispatch_groups[group_name][key] += value
+
+
+def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig):
+    workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig)
+
+    # Merge consumers for any two_stage arrays that have the same producer(s). Print a warning.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if not 'two_stage' in group_json:
+            continue
+        two_stage_json = group_json['two_stage']
+        merged_producers = []
+        merged_consumers = []
+        for two_stage in two_stage_json:
+            producers = two_stage['producers']
+            consumers = two_stage['consumers']
+            if producers in merged_producers:
+                producer_index = merged_producers.index(producers)
+                matching_consumers = merged_consumers[producer_index]
+
+                producer_names = ", ".join([producer['name'] for producer in producers])
+                print(f"::notice file=ci/matrix.yaml::Merging consumers for duplicate producer '{producer_names}' in '{group_name}'",
+                      file=sys.stderr)
+                consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers])
+                print(f"::notice file=ci/matrix.yaml::Original consumers: {consumer_names}", file=sys.stderr)
+                consumer_names = ", ".join([consumer['name'] for consumer in consumers])
+                print(f"::notice file=ci/matrix.yaml::Duplicate consumers: {consumer_names}", file=sys.stderr)
+                # Merge if unique:
+                for consumer in consumers:
+                    if consumer not in matching_consumers:
+                        matching_consumers.append(consumer)
+                consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers])
+                print(f"::notice file=ci/matrix.yaml::Merged consumers: {consumer_names}", file=sys.stderr)
+            else:
+                merged_producers.append(producers)
+                merged_consumers.append(consumers)
+        # Update with the merged lists:
+        two_stage_json = []
+        for producers, consumers in zip(merged_producers, merged_consumers):
+            two_stage_json.append({'producers': producers, 'consumers': consumers})
+        group_json['two_stage'] = two_stage_json
+
+    # Check for any duplicate jobs in standalone arrays. Warn and remove duplicates.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        standalone_jobs = group_json['standalone'] if 'standalone' in group_json else []
+        unique_standalone_jobs = []
+        for job_json in standalone_jobs:
+            if job_json in unique_standalone_jobs:
+                print(f"::notice file=ci/matrix.yaml::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'",
+                      file=sys.stderr)
+            else:
+                unique_standalone_jobs.append(job_json)
+
+        # If any producer/consumer jobs exist in standalone arrays, warn and remove the standalones.
+        two_stage_jobs = group_json['two_stage'] if 'two_stage' in group_json else []
+        for two_stage_job in two_stage_jobs:
+            for producer in two_stage_job['producers']:
+                if producer in unique_standalone_jobs:
+                    print(f"::notice file=ci/matrix.yaml::Removing standalone job '{producer['name']}' " +
+                          f"as it appears as a producer in '{group_name}'",
+                          file=sys.stderr)
+                    unique_standalone_jobs.remove(producer)
+            for consumer in two_stage_job['consumers']:
+                if consumer in unique_standalone_jobs:
+                    print(f"::notice file=ci/matrix.yaml::Removing standalone job '{consumer['name']}' " +
+                          f"as it appears as a consumer in '{group_name}'",
+                          file=sys.stderr)
+                    unique_standalone_jobs.remove(consumer)
+        standalone_jobs = list(unique_standalone_jobs)
+
+        # If any producer or consumer job appears more than once, warn and leave as-is.
+        all_two_stage_jobs = []
+        duplicate_jobs = {}
+        for two_stage_job in two_stage_jobs:
+            for job in two_stage_job['producers'] + two_stage_job['consumers']:
+                if job in all_two_stage_jobs:
+                    duplicate_jobs[job['name']] = duplicate_jobs.get(job['name'], 1) + 1
+                else:
+                    all_two_stage_jobs.append(job)
+        for job_name, count in duplicate_jobs.items():
+            print(f"::warning file=ci/matrix.yaml::" +
+                  f"Job '{job_name}' appears {count} times in '{group_name}'.",
+                  f"Cannot remove duplicate while resolving dependencies. This job WILL execute {count} times.",
+                  file=sys.stderr)
+
+    # Remove all named values that contain an empty list of jobs:
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if not group_json['standalone'] and not group_json['two_stage']:
+            del workflow_dispatch_groups[group_name]
+        elif not group_json['standalone']:
+            del group_json['standalone']
+        elif not group_json['two_stage']:
+            del group_json['two_stage']
+
+    # Natural sort impl (handles embedded numbers in strings, case insensitive)
+    def natural_sort_key(key):
+        return [(int(text) if text.isdigit() else text.lower()) for text in re.split('(\d+)', key)]
+
+    # Sort the dispatch groups by name:
+    workflow_dispatch_groups = dict(sorted(workflow_dispatch_groups.items(), key=lambda x: natural_sort_key(x[0])))
+
+    # Sort the jobs within each dispatch group:
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'standalone' in group_json:
+            group_json['standalone'] = sorted(group_json['standalone'], key=lambda x: natural_sort_key(x['name']))
+        if 'two_stage' in group_json:
+            group_json['two_stage'] = sorted(
+                group_json['two_stage'], key=lambda x: natural_sort_key(x['producers'][0]['name']))
+
+    # Check to see if any .two_stage.producers arrays have more than 1 job, which is not supported.
+    # See ci-dispatch-two-stage.yml for details.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'two_stage' in group_json:
+            for two_stage_json in group_json['two_stage']:
+                num_producers = len(two_stage_json['producers'])
+                if num_producers > 1:
+                    producer_names = ""
+                    for job in two_stage_json['producers']:
+                        producer_names += f" - {job['name']}\n"
+                    error_message = f"ci-dispatch-two-stage.yml currently only supports a single producer. "
+                    error_message += f"Found {num_producers} producers in '{group_name}':\n{producer_names}"
+                    print(f"::error file=ci/matrix.yaml::{error_message}", file=sys.stderr)
+                    raise Exception(error_message)
+
+    # Assign unique IDs in appropriate locations.
+    # These are used to give "hidden" dispatch jobs a short, unique name,
+    # otherwise GHA generates a long, cluttered name.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'standalone' in group_json:
+            for job_json in group_json['standalone']:
+                job_json['id'] = next(guid_generator)
+        if 'two_stage' in group_json:
+            for two_stage_json in group_json['two_stage']:
+                two_stage_json['id'] = next(guid_generator)
+                for job_json in two_stage_json['producers'] + two_stage_json['consumers']:
+                    job_json['id'] = next(guid_generator)
+
+    return workflow_dispatch_groups
+
+
+def find_workflow_line_number(workflow_name):
+    regex = re.compile(f"^( )*{workflow_name}:", re.IGNORECASE)
+    line_number = 0
+    with open(matrix_yaml['filename'], 'r') as f:
+        for line in f:
+            line_number += 1
+            if regex.match(line):
+                return line_number
+    raise Exception(
+        f"Workflow '{workflow_name}' not found in {matrix_yaml['filename]']} (could not match regex: {regex})")
+
+
+def get_matrix_job_origin(matrix_job, workflow_name, workflow_location):
+    filename = matrix_yaml['filename']
+    original_matrix_job = json.dumps(matrix_job, indent=None, separators=(', ', ': '))
+    original_matrix_job = original_matrix_job.replace('"', '')
+    return {
+        'filename': filename,
+        'workflow_name': workflow_name,
+        'workflow_location': workflow_location,
+        'original_matrix_job': original_matrix_job
+    }
+
+
+def remove_skip_test_jobs(matrix_jobs):
+    '''Remove jobs defined in `matrix_file.skip_test_jobs`.'''
+    new_matrix_jobs = []
+    for matrix_job in matrix_jobs:
+        jobs = matrix_job['jobs']
+        new_jobs = set()
+        for job in jobs:
+            if job in matrix_yaml['skip_test_jobs']:
+                # If a skipped test job is a build_required_job, replace it with the 'build' job.
+                if job in matrix_yaml['build_required_jobs']:
+                    # Replace with the prerequisite build job:
+                    new_jobs.add('build')
+                    # If a skipped test job is not a build_required_job, ignore it.
+                else:
+                    pass  # Ignore the job
+            else:
+                new_jobs.add(job)
+        # If no jobs remain, skip this matrix job.
+        if new_jobs:
+            new_matrix_job = copy.deepcopy(matrix_job)
+            new_matrix_job['jobs'] = list(new_jobs)
+            new_matrix_jobs.append(new_matrix_job)
+    return new_matrix_jobs
+
+
+def validate_required_tags(matrix_job):
+    for tag in matrix_yaml['required_tags']:
+        if tag not in matrix_job:
+            raise Exception(error_message_with_matrix_job(matrix_job, f"Missing required tag '{tag}'"))
+
+    all_tags = get_all_matrix_job_tags_sorted()
+    for tag in matrix_job:
+        if tag not in all_tags:
+            raise Exception(error_message_with_matrix_job(matrix_job, f"Unknown tag '{tag}'"))
+
+    if 'gpu' in matrix_job and matrix_job['gpu'] not in matrix_yaml['gpus']:
+        raise Exception(error_message_with_matrix_job(matrix_job, f"Unknown gpu '{matrix_job['gpu']}'"))
+
+
+def set_default_tags(matrix_job):
+    generic_defaults = set(matrix_yaml['defaulted_tags'])
+    generic_defaults -= set(['os'])  # handled specially.
+
+    for tag in generic_defaults:
+        if tag not in matrix_job:
+            matrix_job[tag] = matrix_yaml['default_'+tag]
+
+
+def set_derived_tags(matrix_job):
+    if 'os' not in matrix_job:
+        matrix_job['os'] = lookup_os(matrix_job['ctk'], matrix_job['cxx'])
+
+    # Expand nvcc device compiler shortcut:
+    if matrix_job['cudacxx'] == 'nvcc':
+        matrix_job['cudacxx'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'}
+
+    if 'sm' in matrix_job and matrix_job['sm'] == 'gpu':
+        if not 'gpu' in matrix_job:
+            raise Exception(error_message_with_matrix_job(matrix_job, f"\"sm: 'gpu'\" requires tag 'gpu'."))
+        if not matrix_job['gpu'] in matrix_yaml['gpu_sm']:
+            raise Exception(error_message_with_matrix_job(matrix_job,
+                                                          f"Missing matrix.yaml 'gpu_sm' entry for gpu '{matrix_job['gpu']}'"))
+        matrix_job['sm'] = matrix_yaml['gpu_sm'][matrix_job['gpu']]
+
+    if 'std' in matrix_job and matrix_job['std'] == 'all':
+        host_compiler = matrix_job['cxx'] if 'cxx' in matrix_job else None
+        device_compiler = matrix_job['cudacxx'] if 'cudacxx' in matrix_job else None
+        matrix_job['std'] = lookup_supported_stds(device_compiler, host_compiler)
+
+
+def next_explode_tag(matrix_job):
+    for tag in matrix_job:
+        if not tag in matrix_yaml['non_exploded_tags'] and isinstance(matrix_job[tag], list):
+            return tag
+    return None
+
+
+def explode_tags(matrix_job, explode_tag=None):
+    if not explode_tag:
+        explode_tag = next_explode_tag(matrix_job)
+
+    if not explode_tag:
+        return [matrix_job]
+
+    result = []
+    for value in matrix_job[explode_tag]:
+        new_job = copy.deepcopy(matrix_job)
+        new_job[explode_tag] = value
+        result.extend(explode_tags(new_job))
+
+    return result
+
+
+def preprocess_matrix_jobs(matrix_jobs):
+    result = []
+    for matrix_job in matrix_jobs:
+        validate_required_tags(matrix_job)
+        set_default_tags(matrix_job)
+        for job in explode_tags(matrix_job):
+            set_derived_tags(job)
+            # The derived tags may need to be exploded again:
+            result.extend(explode_tags(job))
+    return result
+
+
+def parse_workflow_matrix_jobs(args, workflow_name):
+    if not workflow_name in matrix_yaml['workflows']:
+        raise Exception(f"Workflow '{workflow_name}' not found in matrix file '{matrix_yaml['filename']}'")
+
+    matrix_jobs = matrix_yaml['workflows'][workflow_name]
+    workflow_line_number = find_workflow_line_number(workflow_name)
+
+    # Tag with the original matrix info, location, etc. for error messages and post-processing.
+    # Do this first so the original tags / order /idx match the inpt object exactly.
+    for idx, matrix_job in enumerate(matrix_jobs):
+        workflow_location = f"{matrix_yaml['filename']}:{workflow_line_number} (job {idx + 1})"
+        matrix_job['origin'] = get_matrix_job_origin(matrix_job, workflow_name, workflow_location)
+
+    # Fill in default values, explode lists.
+    matrix_jobs = preprocess_matrix_jobs(matrix_jobs)
+
+    if args.skip_tests:
+        matrix_jobs = remove_skip_test_jobs(matrix_jobs)
+    if args.dirty_projects:
+        matrix_jobs = [job for job in matrix_jobs if job['project'] in args.dirty_projects]
+
+    # Sort the tags by, *ahem*, "importance":
+    sorted_tags = get_all_matrix_job_tags_sorted()
+    matrix_jobs = [{tag: matrix_job[tag] for tag in sorted_tags if tag in matrix_job} for matrix_job in matrix_jobs]
+
+    return matrix_jobs
+
+
+def parse_workflow_dispatch_groups(args, workflow_name):
+    # Add origin information to each matrix job, explode, filter, add defaults, etc.
+    # The resulting matrix_jobs list is a complete and standardized list of jobs for the dispatch_group builder.
+    matrix_jobs = parse_workflow_matrix_jobs(args, workflow_name)
+
+    # If we're printing multiple workflows, add a prefix to the group name to differentiate them.
+    group_prefix = f"[{workflow_name}] " if len(args.workflows) > 1 else ""
+
+    # Convert the matrix jobs into a dispatch group object:
+    workflow_dispatch_groups = {}
+    for matrix_job in matrix_jobs:
+        matrix_job_dispatch_group = matrix_job_to_dispatch_group(matrix_job, group_prefix)
+        merge_dispatch_groups(workflow_dispatch_groups, matrix_job_dispatch_group)
+
+    return finalize_workflow_dispatch_groups(workflow_dispatch_groups)
+
+
+def write_outputs(final_workflow):
+    job_list = []
+    runner_counts = {}
+    id_to_full_job_name = {}
+
+    total_jobs = 0
+
+    def process_job_array(group_name, array_name, parent_json):
+        nonlocal job_list
+        nonlocal runner_counts
+        nonlocal total_jobs
+
+        job_array = parent_json[array_name] if array_name in parent_json else []
+        for job_json in job_array:
+            total_jobs += 1
+            job_list.append(f"{total_jobs:4} id: {job_json['id']:<4}   {array_name:13} {job_json['name']}")
+            id_to_full_job_name[job_json['id']] = f"{group_name} {job_json['name']}"
+            runner = job_json['runner']
+            runner_counts[runner] = runner_counts.get(runner, 0) + 1
+
+    for group_name, group_json in final_workflow.items():
+        job_list.append(f"{'':4} {group_name}:")
+        process_job_array(group_name, 'standalone', group_json)
+        if 'two_stage' in group_json:
+            for two_stage_json in group_json['two_stage']:
+                process_job_array(group_name, 'producers', two_stage_json)
+                process_job_array(group_name, 'consumers', two_stage_json)
+
+    # Sort by descending counts:
+    runner_counts = {k: v for k, v in sorted(runner_counts.items(), key=lambda item: item[1], reverse=True)}
+
+    runner_heading = f"🏃‍ Runner counts (total jobs: {total_jobs})"
+
+    runner_counts_table = f"| {'#':^4} | Runner\n"
+    runner_counts_table += "|------|------\n"
+    for runner, count in runner_counts.items():
+        runner_counts_table += f"| {count:4} | `{runner}`\n"
+
+    runner_json = {"heading": runner_heading, "body": runner_counts_table}
+
+    os.makedirs("workflow", exist_ok=True)
+    write_json_file("workflow/workflow.json", final_workflow)
+    write_json_file("workflow/workflow_keys.json", list(final_workflow.keys()))
+    write_json_file("workflow/job_ids.json", id_to_full_job_name)
+    write_text_file("workflow/job_list.txt", "\n".join(job_list))
+    write_json_file("workflow/runner_summary.json", runner_json)
+
+
+def print_gha_workflow(args):
+    final_workflow = {}
+    for workflow_name in args.workflows:
+        workflow_dispatch_groups = parse_workflow_dispatch_groups(args, workflow_name)
+        merge_dispatch_groups(final_workflow, workflow_dispatch_groups)
+
+    write_outputs(final_workflow)
+
+
+def print_devcontainer_info(args):
+    devcontainer_version = matrix_yaml['devcontainer_version']
+
+    matrix_jobs = []
+    for workflow in matrix_yaml['workflows']:
+        matrix_jobs.extend(parse_workflow_matrix_jobs(args, workflow))
+
+    # Remove all but the following keys from the matrix jobs:
+    keep_keys = ['ctk', 'cxx', 'os']
+    combinations = [{key: job[key] for key in keep_keys} for job in matrix_jobs]
+
+    # Remove duplicates and filter out windows jobs:
+    unique_combinations = []
+    for combo in combinations:
+        if not is_windows(combo) and combo not in unique_combinations:
+            unique_combinations.append(combo)
+
+    for combo in unique_combinations:
+        combo['compiler_name'] = combo['cxx']['name']
+        combo['compiler_version'] = combo['cxx']['version']
+        combo['compiler_exe'] = combo['cxx']['exe']
+        del combo['cxx']
+
+        combo['cuda'] = combo['ctk']
+        del combo['ctk']
+
+    devcontainer_json = {'devcontainer_version': devcontainer_version, 'combinations': unique_combinations}
+
+    # Pretty print the devcontainer json to stdout:
+    print(json.dumps(devcontainer_json, indent=2))
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Compute matrix for workflow')
+    parser.add_argument('matrix_file', help='Path to the matrix YAML file')
+    parser_mode_group = parser.add_argument_group('Output Mode', "Must specify one of these options.")
+    parser_mode = parser_mode_group.add_mutually_exclusive_group(required=True)
+    parser_mode.add_argument('--workflows', nargs='+',
+                             help='Print GHA workflow with jobs from [pull_request, nightly, weekly, etc]')
+    parser_mode.add_argument('--devcontainer-info', action='store_true',
+                             help='Print devcontainer info instead of GHA workflows.')
+    parser.add_argument('--dirty-projects', nargs='*', help='Filter jobs to only these projects')
+    parser.add_argument('--skip-tests', action='store_true',
+                        help='Remove jobs defined in `matrix_file.skip_test_jobs`.')
+    args = parser.parse_args()
+
+    # Check if the matrix file exists
+    if not os.path.isfile(args.matrix_file):
+        print(f"Error: Matrix file '{args.matrix_file}' does not exist.")
+        sys.exit(1)
+
+    with open(args.matrix_file, 'r') as f:
+        global matrix_yaml
+        matrix_yaml = yaml.safe_load(f)
+        matrix_yaml['filename'] = args.matrix_file
+
+    if args.workflows:
+        print_gha_workflow(args)
+    elif args.devcontainer_info:
+        print_devcontainer_info(args)
+    else:
+        parser.print_usage()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/actions/workflow-results/action.yml b/.github/actions/workflow-results/action.yml
new file mode 100644
index 0000000000..5c08a55fdd
--- /dev/null
+++ b/.github/actions/workflow-results/action.yml
@@ -0,0 +1,96 @@
+name: "CCCL Workflow Sentinal"
+description: "Check the results of the dispatched jobs and comment on the PR."
+
+inputs:
+  github_token:
+    description: "The GitHub token to use for commenting on the PR. No comment will be made if not provided."
+    required: false
+  pr_number:
+    description: "The PR number to comment on, if applicable. No comment will be made if not provided."
+    required: false
+
+outputs:
+  success:
+    description: "Whether any jobs failed."
+    value: ${{ steps.check-dispatch.outputs.success }}
+
+runs:
+  using: "composite"
+  steps:
+
+    - name: Download workflow artifacts
+      uses: actions/download-artifact@v3
+      with:
+        name: workflow
+        path: workflow/
+
+    - name: Download job success artifacts
+      continue-on-error: true # This may fail if no jobs succeed. The checks below will catch this.
+      uses: actions/download-artifact@v3
+      with:
+        name: dispatch-job-success
+        path: dispatch-job-success/
+
+    - name: Prepare execution summary
+      id: job-summary
+      continue-on-error: true
+      shell: bash --noprofile --norc -euo pipefail {0}
+      run: |
+        echo "Generating job summary..."
+        python3 "${GITHUB_ACTION_PATH}/prepare-execution-summary.py" workflow/workflow.json
+
+    - name: Prepare final summary
+      id: final-summary
+      continue-on-error: true
+      shell: bash --noprofile --norc -euo pipefail {0}
+      run: |
+        echo "::group::Final Summary"
+        python3 "${GITHUB_ACTION_PATH}/final-summary.py" | tee final_summary.md
+        echo "::endgroup::"
+
+        # This allows multiline strings and special characters to be passed through the GHA outputs:
+        url_encode_string() {
+          python3 -c "import sys; from urllib.parse import quote; print(quote(sys.stdin.read()))"
+        }
+
+        echo "::group::GHA Output: SUMMARY"
+        printf "SUMMARY=%s\n" "$(cat final_summary.md | url_encode_string)" | tee -a "${GITHUB_OUTPUT}"
+        echo "::endgroup::"
+
+        cp final_summary.md ${GITHUB_STEP_SUMMARY}
+
+    - name: Comment on PR
+      if: ${{ !cancelled() && inputs.pr_number != '' && inputs.github_token != ''}}
+      continue-on-error: true
+      env:
+        PR_NUMBER: ${{ fromJSON(inputs.pr_number) }}
+        COMMENT_BODY: ${{ steps.final-summary.outputs.SUMMARY }}
+      uses: actions/github-script@v4
+      with:
+        github-token: ${{ inputs.github_token }}
+        script: |
+          const pr_number = process.env.PR_NUMBER;
+          const owner = 'NVIDIA';
+          const repo = 'cccl';
+          // Decode URL-encoded string for proper display in comments
+          const commentBody = decodeURIComponent(process.env.COMMENT_BODY);
+          console.log('::group::Commenting on PR #' + pr_number + ' with the following message:')
+          console.log(commentBody);
+          console.log('::endgroup::');
+          github.issues.createComment({
+              owner: owner,
+              repo: repo,
+              issue_number: pr_number,
+              body: commentBody
+          });
+
+    - name: Check for job success
+      id: check-dispatch
+      shell: bash --noprofile --norc -euo pipefail {0}
+      run: |
+        if "${GITHUB_ACTION_PATH}/verify-job-success.py" workflow/job_ids.json; then
+          echo "success=true" >> "${GITHUB_OUTPUT}"
+        else
+          echo "success=false" >> "${GITHUB_OUTPUT}"
+          exit 1
+        fi
diff --git a/.github/actions/workflow-results/final-summary.py b/.github/actions/workflow-results/final-summary.py
new file mode 100755
index 0000000000..3057724d81
--- /dev/null
+++ b/.github/actions/workflow-results/final-summary.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import re
+import sys
+
+
+def read_file(filepath):
+    with open(filepath, 'r') as f:
+        return f.read().rstrip("\n ")
+
+def print_file_if_present(filepath):
+    if os.path.exists(filepath):
+        print(read_file(filepath) + "\n\n")
+
+
+def print_summary_file(filepath, heading_level):
+    summary_json = json.load(open(filepath))
+    print(f"<details><summary><h{heading_level}>{summary_json['heading']}</h{heading_level}></summary>\n")
+    print(summary_json["body"] + "\n")
+    print("</details>\n")
+
+
+def main():
+    # List of all projects detected in 'execution/projects/{project}_summary.json':
+    projects = []
+    project_file_regex="(.*)_summary.json"
+    for filename in os.listdir("execution/projects"):
+        match = re.match(project_file_regex, filename)
+        if match:
+            projects.append(match.group(1))
+
+    print(f"<details><summary>{read_file('execution/heading.txt')}</summary>\n")
+
+    print("<ul>")
+    for project in projects:
+      print("<li>")
+      print_summary_file(f"execution/projects/{project}_summary.json", 3)
+    print("</ul>\n")
+
+    print_summary_file("workflow/runner_summary.json", 2)
+    print_file_if_present('workflow/changes.md')
+
+    print("</details>")
+
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/actions/workflow-results/prepare-execution-summary.py b/.github/actions/workflow-results/prepare-execution-summary.py
new file mode 100755
index 0000000000..26b8e82363
--- /dev/null
+++ b/.github/actions/workflow-results/prepare-execution-summary.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+
+
+import argparse
+import json
+import os
+import re
+import sys
+
+
+def job_succeeded(job):
+    # The job was successful if the artifact file 'dispatch-job-success/dispatch-job-success-<job_id>' exists:
+    return os.path.exists(f'dispatch-job-success/{job["id"]}')
+
+
+def natural_sort_key(key):
+    # Natural sort impl (handles embedded numbers in strings, case insensitive)
+    return [(int(text) if text.isdigit() else text.lower()) for text in re.split('(\d+)', key)]
+
+
+# Print the prepared text summary to the file at the given path
+def write_text(filepath, summary):
+    with open(filepath, 'w') as f:
+        print(summary, file=f)
+
+
+# Print the prepared JSON object to the file at the given path
+def write_json(filepath, json_object):
+    with open(filepath, 'w') as f:
+        json.dump(json_object, f, indent=4)
+
+
+def extract_jobs(workflow):
+    jobs = []
+    for group_name, group in workflow.items():
+        if "standalone" in group:
+            jobs += group["standalone"]
+        if "two_stage" in group:
+            for two_stage in group["two_stage"]:
+                jobs += two_stage["producers"]
+                jobs += two_stage["consumers"]
+    return jobs
+
+
+def build_summary(jobs):
+    summary = {'passed': 0, 'failed': 0, 'projects': {}}
+    projects = summary['projects']
+
+    for job in jobs:
+        success = job_succeeded(job)
+
+        if success:
+            summary['passed'] += 1
+        else:
+            summary['failed'] += 1
+
+        matrix_job = job["origin"]["matrix_job"]
+
+        project = matrix_job["project"]
+        if not project in projects:
+            projects[project] = {'passed': 0, 'failed': 0, 'tags': {}}
+
+        if success:
+            projects[project]['passed'] += 1
+        else:
+            projects[project]['failed'] += 1
+
+        tags = projects[project]['tags']
+        for tag in matrix_job.keys():
+            if tag == 'project':
+                continue
+
+            if not tag in tags:
+                tags[tag] = {'passed': 0, 'failed': 0, 'values': {}}
+
+            value = str(matrix_job[tag])
+            values = tags[tag]['values']
+
+            if not value in values:
+                values[value] = {'passed': 0, 'failed': 0}
+
+            if success:
+                tags[tag]['passed'] += 1
+                values[value]['passed'] += 1
+            else:
+                tags[tag]['failed'] += 1
+                values[value]['failed'] += 1
+
+    # Natural sort the value strings within each tag:
+    for project, project_summary in projects.items():
+        for tag, tag_summary in project_summary['tags'].items():
+            tag_summary['values'] = dict(sorted(tag_summary['values'].items(),
+                                         key=lambda item: natural_sort_key(item[0])))
+
+    # Sort the tags within each project so that:
+    # - "Likely culprits" come first. These are tags that have multiple values, but only one has failures.
+    # - The remaining tags with failures come next.
+    # - Tags with no failures come last.
+    def rank_tag(tag_summary):
+        num_failing_values = sum(1 for value_summary in tag_summary['values'].values() if value_summary['failed'] > 0)
+
+        if len(tag_summary['values']) > 1 and num_failing_values == 1:
+            return 0
+        elif len(tag_summary['values']) > 1 and tag_summary['failed'] > 0:
+            return 1
+        elif tag_summary['failed'] > 0:
+            return 2
+        return 3
+    for project, project_summary in projects.items():
+        project_summary['tags'] = dict(sorted(project_summary['tags'].items(),
+                                       key=lambda item: (rank_tag(item[1]), item[0])))
+
+    return summary
+
+
+def get_summary_heading(summary):
+    passed = summary['passed']
+    failed = summary['failed']
+    total = passed + failed
+
+    if passed == 0:
+        flag = '🟥'
+    elif failed > 0:
+        flag = '🟨'
+    else:
+        flag = '🟩'
+
+    return f'{flag} CI Results [ Failed: {failed} | Passed: {passed} | Total: {total} ]'
+
+
+def get_project_heading(project, project_summary):
+    if project_summary['passed'] == 0:
+        flag = '🟥'
+    elif project_summary['failed'] > 0:
+        flag = '🟨'
+    else:
+        flag = '🟩'
+
+    passed = project_summary['passed']
+    failed = project_summary['failed']
+    total = project_summary['failed'] + project_summary['passed']
+
+    return f'{flag} Project {project} [ Failed: {failed} | Passed: {passed} | Total: {total} ]'
+
+
+def get_tag_line(tag, tag_summary):
+    passed = tag_summary['passed']
+    failed = tag_summary['failed']
+    values = tag_summary['values']
+
+    # Find the value with an failure rate that matches the tag's failure rate:
+    suspicious = None
+    if len(values) > 1 and failed > 0:
+        for value, value_summary in values.items():
+            if value_summary['failed'] == failed:
+                suspicious = value_summary
+                suspicious['name'] = value
+                break
+
+    # Did any jobs with this value pass?
+    likely_culprit = suspicious if suspicious and suspicious['passed'] == 0 else None
+
+    note = ''
+    if likely_culprit:
+        flag = '🚨'
+        note = f': {likely_culprit["name"]} {flag}'
+    elif suspicious:
+        flag = '🔍'
+        note = f': {suspicious["name"]} {flag}'
+    elif passed == 0:
+        flag = '🟥'
+    elif failed > 0:
+        flag = '🟨'
+    else:
+        flag = '🟩'
+
+    return f'{flag} {tag}{note}'
+
+
+def get_value_line(value, value_summary, tag_summary):
+    passed = value_summary['passed']
+    failed = value_summary['failed']
+    total = passed + failed
+
+    parent_size = len(tag_summary['values'])
+    parent_failed = tag_summary['failed']
+
+    is_suspicious = failed > 0 and failed == parent_failed and parent_size > 1
+    is_likely_culprit = is_suspicious and passed == 0
+
+    if is_likely_culprit:
+        flag = '🔥'
+    elif is_suspicious:
+        flag = '🔍'
+    elif passed == 0:
+        flag = '🟥'
+    elif failed > 0:
+        flag = '🟨'
+    else:
+        flag = '🟩'
+
+    percent = int(100 * failed / total)
+    left_aligned = f"{flag} {value} ({percent}% Fail)"
+    return f'  {left_aligned:<30} Failed: {failed:^3} -- Passed: {passed:^3} -- Total: {total:^3}'
+
+
+def get_project_summary_body(project, project_summary):
+    body = ['```']
+    for tag, tag_summary in project_summary['tags'].items():
+        body.append(get_tag_line(tag, tag_summary))
+        for value, value_summary in tag_summary['values'].items():
+            body.append(get_value_line(value, value_summary, tag_summary))
+    body.append('```')
+    return "\n".join(body)
+
+
+def write_project_summary(project, project_summary):
+    heading = get_project_heading(project, project_summary)
+    body = get_project_summary_body(project, project_summary)
+
+    summary = {'heading': heading, 'body': body}
+
+    write_json(f'execution/projects/{project}_summary.json', summary)
+
+
+def write_workflow_summary(workflow):
+    summary = build_summary(extract_jobs(workflow))
+
+    os.makedirs('execution/projects', exist_ok=True)
+
+    write_text('execution/heading.txt', get_summary_heading(summary))
+
+    for project, project_summary in summary['projects'].items():
+        write_project_summary(project, project_summary)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('workflow', type=argparse.FileType('r'))
+    args = parser.parse_args()
+
+    workflow = json.load(args.workflow)
+    write_workflow_summary(workflow)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/actions/workflow-results/verify-job-success.py b/.github/actions/workflow-results/verify-job-success.py
new file mode 100755
index 0000000000..d2e69f0677
--- /dev/null
+++ b/.github/actions/workflow-results/verify-job-success.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import sys
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("job_id_map", type=argparse.FileType('r'))
+    args = parser.parse_args()
+
+    job_id_map = json.load(args.job_id_map)
+
+    # For each job id, verify that the file 'dispatch-job-success/<job_id>' exists
+    success = True
+    for job_id, job_name in job_id_map.items():
+        success_file = f'dispatch-job-success/{job_id}'
+        print(f'Verifying job with id "{job_id}": "{job_name}"')
+        if not os.path.exists(success_file):
+            print(f'Failed: Artifact "dispatch-job-success/{job_id}" not found')
+            success = False
+
+    if not success:
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/workflows/build-and-test-linux.yml b/.github/workflows/build-and-test-linux.yml
deleted file mode 100644
index 6c5ba40061..0000000000
--- a/.github/workflows/build-and-test-linux.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: build and test
-
-defaults:
-  run:
-    shell: bash -exo pipefail {0}
-
-on:
-  workflow_call:
-    inputs:
-      cpu: {type: string, required: true}
-      test_name: {type: string, required: false}
-      build_script: {type: string, required: false}
-      test_script: {type: string, required: false}
-      container_image: {type: string, required: false}
-      run_tests: {type: boolean, required: false, default: true}
-
-permissions:
-  contents: read
-
-jobs:
-  build:
-    name: Build ${{inputs.test_name}}
-    permissions:
-      id-token: write
-      contents: read
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: Build ${{inputs.test_name}}
-      runner: linux-${{inputs.cpu}}-cpu16
-      image:  ${{ inputs.container_image }}
-      command: |
-        ${{ inputs.build_script }}
-
-  test:
-    needs: build
-    permissions:
-      id-token: write
-      contents: read
-    if:  ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
-    name: Test ${{inputs.test_name}}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: Test ${{inputs.test_name}}
-      runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
-      image: ${{inputs.container_image}}
-      command: |
-        ${{ inputs.test_script }}
diff --git a/.github/workflows/build-and-test-windows.yml b/.github/workflows/build-and-test-windows.yml
deleted file mode 100644
index afcb78d835..0000000000
--- a/.github/workflows/build-and-test-windows.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: Build Windows
-
-on:
-  workflow_call:
-    inputs:
-      test_name: {type: string, required: false}
-      build_script: {type: string, required: false}
-      container_image: {type: string, required: false}
-
-jobs:
-  prepare:
-    name: Build ${{inputs.test_name}}
-    runs-on: windows-amd64-cpu16
-    permissions:
-      id-token: write
-      contents: read
-    env:
-      SCCACHE_BUCKET: rapids-sccache-devs
-      SCCACHE_REGION: us-east-2
-      SCCACHE_IDLE_TIMEOUT: 0
-      SCCACHE_S3_USE_SSL: true
-      SCCACHE_S3_NO_CREDENTIALS: false
-    steps:
-      - name: Get AWS credentials for sccache bucket
-        uses: aws-actions/configure-aws-credentials@v2
-        with:
-          role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
-          aws-region: us-east-2
-          role-duration-seconds: 43200 # 12 hours
-      - name: Fetch ${{ inputs.container_image }}
-        shell: powershell
-        run: docker pull ${{ inputs.container_image }}
-      - name: Run the tests
-        shell: powershell
-        run: >-
-            docker run ${{ inputs.container_image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
-                                                                    [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
-                                                                    [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
-                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
-                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
-                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
-                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
-                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
-                                                                    git clone https://github.com/NVIDIA/cccl.git;
-                                                                    cd cccl;
-                                                                    git fetch --all;
-                                                                    git checkout ${{github.ref_name}};
-                                                                    ${{inputs.build_script}};"
diff --git a/.github/workflows/ci-workflow-nightly.yml b/.github/workflows/ci-workflow-nightly.yml
new file mode 100644
index 0000000000..ed1bb149b3
--- /dev/null
+++ b/.github/workflows/ci-workflow-nightly.yml
@@ -0,0 +1,107 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main workflow that runs on every PR and push to main
+name: nightly
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -euo pipefail {0}
+
+on:
+  schedule:
+    - cron: '0 7 * * *' # 7AM UTC, 12AM PST, 3AM EST
+
+concurrency:
+  group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
+
+jobs:
+
+  build-workflow:
+    name: Build workflow from matrix
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    outputs:
+      workflow: ${{ steps.build-workflow.outputs.workflow }}
+      workflow_keys: ${{ steps.build-workflow.outputs.workflow_keys }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Build workflow
+        id: build-workflow
+        uses: ./.github/actions/workflow-build
+        with:
+          workflows: nightly
+
+  run-workflow:
+    name: ${{ matrix.name }}
+    needs: build-workflow
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        name: ${{ fromJSON(needs.build-workflow.outputs.workflow_keys) }}
+    uses: ./.github/workflows/workflow-dispatch.yml
+    with:
+      name: ${{ matrix.name }}
+      jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }}
+
+  # Check all other job statuses. This job gates branch protection checks.
+  ci:
+    name: CI
+    if: ${{ always() || !cancelled() }}
+    needs:
+      - build-workflow
+      - run-workflow
+    permissions:
+      contents: read
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+
+      - name: Check workflow success
+        id: check-workflow
+        uses: ./.github/actions/workflow-results
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check results
+        run: |
+          status="passed"
+
+          check_result() {
+            name=$1
+            expected=$2
+            result=$3
+
+            echo "Checking if $name job result ('$result') is '$expected'..."
+            if [[ "$result" != "$expected" ]]; then
+              echo "$name job failed"
+
+              status="failed"
+            fi
+          }
+
+          # Note that run-workflow is different:
+          check_result "build-workflow" "success" "${{needs.build-workflow.result}}"
+          check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}"
+
+          if [[ "$status" == "failed" ]]; then
+            exit 1
+          fi
diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml
new file mode 100644
index 0000000000..3ff29cfeb2
--- /dev/null
+++ b/.github/workflows/ci-workflow-pull-request.yml
@@ -0,0 +1,134 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main workflow that runs on every PR and push to main
+name: pull_request
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -euo pipefail {0}
+
+on:
+  push:
+    branches:
+      - "pull-request/[0-9]+"
+
+concurrency:
+  group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
+  cancel-in-progress: true
+
+jobs:
+
+  build-workflow:
+    name: Build workflow from matrix
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+    outputs:
+      workflow: ${{ steps.build-workflow.outputs.workflow }}
+      workflow_keys: ${{ steps.build-workflow.outputs.workflow_keys }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Lookup PR info
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Build workflow
+        id: build-workflow
+        uses: ./.github/actions/workflow-build
+        with:
+          skip_tests: ${{ toJSON(contains(github.event.head_commit.message, '[skip-tests]')) }}
+          inspect_changes_script: ${{ toJSON(!contains(github.event.head_commit.message, '[all-projects]') && 'ci/inspect_changes.sh' || '') }}
+          inspect_changes_base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+          workflows: >-
+            ${{ !contains(github.event.head_commit.message, '[workflow:!pull_request]') && 'pull_request' || '' }}
+            ${{ contains(github.event.head_commit.message, '[workflow:nightly]') && 'nightly' || '' }}
+            ${{ contains(github.event.head_commit.message, '[workflow:test]') && 'test' || '' }}
+
+  run-workflow:
+    name: ${{ matrix.name }}
+    needs: build-workflow
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        name: ${{ fromJSON(needs.build-workflow.outputs.workflow_keys) }}
+    uses: ./.github/workflows/workflow-dispatch.yml
+    with:
+      name: ${{ matrix.name }}
+      jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }}
+
+  verify-devcontainers:
+    name: Verify Dev Containers
+    permissions:
+      id-token: write
+      contents: read
+    uses: ./.github/workflows/verify-devcontainers.yml
+
+  # Check all other job statuses. This job gates branch protection checks.
+  ci:
+    name: CI
+    if: ${{ always() && !cancelled() }}
+    needs:
+      - build-workflow
+      - run-workflow
+      - verify-devcontainers
+    permissions:
+      contents: read
+      pull-requests: write # Posts a comment back to the PR.
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+
+      - name: Get Base Branch from PR
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Check workflow success
+        id: check-workflow
+        uses: ./.github/actions/workflow-results
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          pr_number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }}
+
+      - name: Check results
+        run: |
+          status="passed"
+
+          check_result() {
+            name=$1
+            expected=$2
+            result=$3
+
+            echo "Checking if $name job result ('$result') is '$expected'..."
+            if [[ "$result" != "$expected" ]]; then
+              echo "$name job failed"
+
+              status="failed"
+            fi
+          }
+
+          # Note that run-workflow is different:
+          check_result "build-workflow" "success" "${{needs.build-workflow.result}}"
+          check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}"
+          check_result "verify-devcontainers" "success" "${{needs.verify-devcontainers.result}}"
+
+          if [[ "$status" == "failed" ]]; then
+            exit 1
+          fi
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
deleted file mode 100644
index 7b5ed4ef27..0000000000
--- a/.github/workflows/dispatch-build-and-test.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: Dispatch build and test
-
-on:
-  workflow_call:
-    inputs:
-      project_name: {type: string, required: true}
-      per_cuda_compiler_matrix: {type: string, required: true}
-      devcontainer_version: {type: string, required: true}
-      is_windows: {type: boolean, required: true}
-
-permissions:
-  contents: read
-
-jobs:
-  # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
-  # ensures that the build/test steps can overlap across different configurations. For example,
-  # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
-  build_and_test_linux:
-    name: build and test linux
-    permissions:
-      id-token: write
-      contents: read
-    if: ${{ !inputs.is_windows }}
-    uses: ./.github/workflows/build-and-test-linux.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
-    with:
-      cpu: ${{ matrix.cpu }}
-      test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}}
-      build_script: './ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
-      test_script:  './ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
-      container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }}
-
-  build_and_test_windows:
-    name: build and test windows
-    permissions:
-      id-token: write
-      contents: read
-    if: ${{ inputs.is_windows }}
-    uses: ./.github/workflows/build-and-test-windows.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
-    with:
-      test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
-      build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}}"
-      container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}}
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
deleted file mode 100644
index 3dcee0cf6c..0000000000
--- a/.github/workflows/pr.yml
+++ /dev/null
@@ -1,242 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This is the main workflow that runs on every PR and push to main
-name: pr
-
-defaults:
-  run:
-    shell: bash -euo pipefail {0}
-
-on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
-
-# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts.
-concurrency:
-  group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-  pull-requests: read
-
-jobs:
-  inspect-changes:
-    name: "Inspect Changes"
-    runs-on: ubuntu-latest
-    outputs:
-      LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }}
-      CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }}
-      THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }}
-    steps:
-      - name: Get Base Branch from PR
-        id: get-pr-info
-        uses: nv-gha-runners/get-pr-info@main
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Identify dirty subprojects
-        id: set-outputs
-        run: |
-          ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA}
-        env:
-          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
-
-  compute-matrix:
-    name: Compute matrix
-    runs-on: ubuntu-latest
-    needs:
-      - inspect-changes
-    outputs:
-      DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
-      PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
-      PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}}
-      NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}}
-      CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}}
-      CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Compute matrix outputs
-        id: set-outputs
-        run: |
-          .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
-        env:
-          THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }}
-          CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }}
-          LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }}
-
-  nvrtc:
-    name: libcudacxx NVRTC CUDA${{matrix.cuda}}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
-    uses: ./.github/workflows/run-as-coder.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }}
-    with:
-      name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}}
-
-  thrust:
-    name: Thrust CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "thrust"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  cub:
-    name: CUB CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "cub"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  libcudacxx:
-    name: libcudacxx CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "libcudacxx"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  clang-cuda:
-    name: ${{matrix.lib}} Clang CUDA
-    permissions:
-      id-token: write
-      contents: read
-    needs: compute-matrix
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}}
-      runner: linux-${{matrix.cpu}}-cpu16
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}"
-
-  cccl-infra:
-    name: CCCL Infrastructure
-    permissions:
-      id-token: write
-      contents: read
-    needs: compute-matrix
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}
-        ctest --preset=cccl-infra
-
-  verify-devcontainers:
-    name: Verify Dev Containers
-    permissions:
-      id-token: write
-      contents: read
-    uses: ./.github/workflows/verify-devcontainers.yml
-
-  verify-codegen:
-    name: Verify Codegen in libcudacxx
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Run libcudacxx codegen verification
-        id: verify-codegen
-        run: |
-          sudo apt-get update
-          sudo apt-get install ninja-build
-          export CXX="g++"
-          ./ci/verify_codegen.sh
-
-  # This job is the final job that runs after all other jobs and is used for branch protection status checks.
-  # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
-  # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
-  ci:
-    runs-on: ubuntu-latest
-    name: CI
-    if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
-    needs:
-      - clang-cuda
-      - cub
-      - libcudacxx
-      - nvrtc
-      - thrust
-      - cccl-infra
-      - verify-devcontainers
-      - verify-codegen
-    steps:
-      - name: Check status of all precursor jobs
-        if: >-
-          ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}}
-        run: exit 1
diff --git a/.github/workflows/run-as-coder.yml b/.github/workflows/run-as-coder.yml
deleted file mode 100644
index 5430e6c0ae..0000000000
--- a/.github/workflows/run-as-coder.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: Run as coder user
-
-defaults:
-  run:
-    shell: bash -exo pipefail {0}
-
-
-on:
-  workflow_call:
-    inputs:
-      name: {type: string, required: true}
-      image: {type: string, required: true}
-      runner: {type: string, required: true}
-      command: {type: string, required: true}
-      env: { type: string, required: false, default: "" }
-
-permissions:
-  contents: read
-
-jobs:
-  run-as-coder:
-    name: ${{inputs.name}}
-    permissions:
-      id-token: write
-      contents: read
-    runs-on: ${{inputs.runner}}
-    container:
-      options: -u root
-      image: ${{inputs.image}}
-      env:
-        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-        with:
-          path: cccl
-          persist-credentials: false
-      - name: Move files to coder user home directory
-        run: |
-          cp -R cccl /home/coder/cccl
-          chown -R coder:coder /home/coder/
-      - name: Add NVCC problem matcher
-        run: |
-          echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json"
-      - name: Configure credentials and environment variables for sccache
-        uses: ./cccl/.github/actions/configure_cccl_sccache
-      - name: Run command
-        shell: su coder {0}
-        run: |
-            set -eo pipefail
-            cd ~/cccl
-            echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m"
-            echo -e "\e[1;34m${{inputs.command}}\e[0m"
-            eval "${{inputs.command}}" || exit_code=$?
-            if [ ! -z "$exit_code" ]; then
-              echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
-              echo "::error:: To replicate this failure locally, follow the steps below:"
-              echo "1. Clone the repository, and navigate to the correct branch and commit:"
-              echo "   git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
-              echo ""
-              echo "2. Run the failed command inside the same Docker container used by the CI:"
-              echo "   docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
-              echo ""
-              echo "For additional information, see:"
-              echo "   - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
-              echo "   - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
-              exit $exit_code
-            fi
diff --git a/.github/workflows/workflow-dispatch-job.yml b/.github/workflows/workflow-dispatch-job.yml
new file mode 100644
index 0000000000..641e054659
--- /dev/null
+++ b/.github/workflows/workflow-dispatch-job.yml
@@ -0,0 +1,166 @@
+name: "Workflow/Dispatch/Job"
+
+# Important note about depending on this workflow: The `result` will be a failure, even if successful.
+#
+# This reusable workflow dispatches to a number of internal jobs. Only one job will run,
+# and some may be in error states due to empty matrices (which are used instead of `if` to keep
+# skipped dispatch jobs out of the GHA UI). The `continue-on-error` flag should prevent these
+# errors from failing the workflow, but this does not work.
+#
+# Thus, the `result` of this workflow will always be a failure, even if the job itself is successful.
+#
+# Instead, the results from each job is uploaded as an artifact. See the workflow_results action for more details.
+# To depend on this job, you should use the `success` output instead:
+#
+# ```
+# dependent_job:
+#   needs: dispatch-job
+#   if: ${{ !cancelled() && needs.dispatch-job.outputs.success }}
+# ```
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -euo pipefail {0}
+
+on:
+  workflow_call:
+    outputs:
+      success:
+        value: ${{ contains(toJSON(jobs.*.outputs.success), 'true') }}
+    inputs:
+      name: {type: string, required: true}
+      image: {type: string, required: true}
+      runner: {type: string, required: true}
+      command: {type: string, required: true}
+      id: {type: string, required: true}
+      env: {type: string, required: false}
+      dummy_matrix: {type: string, required: false, default: '[{"valid": true}]'}
+
+permissions:
+  contents: read
+
+jobs:
+  linux:
+    name: ${{inputs.name}}
+    continue-on-error: ${{ ! startsWith(inputs.runner, 'linux') }}
+    outputs:
+      success: ${{ steps.done.outputs.SUCCESS }}
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        include: ${{ fromJSON(startsWith(inputs.runner, 'linux') && inputs.dummy_matrix || '[]') }}
+    runs-on: ${{inputs.runner}}
+    container:
+      options: -u root
+      image: ${{inputs.image}}
+      env:
+        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          path: cccl
+          persist-credentials: false
+      - name: Move files to coder user home directory
+        run: |
+          cp -R cccl /home/coder/cccl
+          chown -R coder:coder /home/coder/
+      - name: Add NVCC problem matcher
+        run: |
+          echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json"
+      - name: Configure credentials and environment variables for sccache
+        uses: ./cccl/.github/actions/configure_cccl_sccache
+      - name: Run command
+        shell: su coder {0}
+        env:
+          # Dereferencing the command from and env var instead of a GHA input avoids issues with escaping
+          # semicolons and other special characters (e.g. `-arch "60;70;80"`).
+          COMMAND: ${{inputs.command}}
+        run: |
+            set -eo pipefail
+            cd ~/cccl
+            echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m"
+            echo -e "\e[1;34m${COMMAND}\e[0m"
+            eval "${COMMAND}" || exit_code=$?
+            if [ ! -z "$exit_code" ]; then
+              echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
+              echo "::error:: To replicate this failure locally, follow the steps below:"
+              echo "1. Clone the repository, and navigate to the correct branch and commit:"
+              echo "   git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
+              echo ""
+              echo "2. Run the failed command inside the same Docker container used by the CI:"
+              echo "   docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${COMMAND}"
+              echo ""
+              echo "For additional information, see:"
+              echo "   - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
+              echo "   - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
+              exit $exit_code
+            fi
+      - name: Mark job as successful
+        id: done
+        run: |
+          echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT}
+          mkdir dispatch-job-success
+          touch dispatch-job-success/${{inputs.id}}
+      - name: Upload dispatch-job-success
+        uses: actions/upload-artifact@v3
+        with:
+          name: dispatch-job-success
+          path: dispatch-job-success/${{inputs.id}}
+
+  windows:
+    name: ${{inputs.name}}
+    continue-on-error: ${{ ! startsWith(inputs.runner, 'windows') }}
+    outputs:
+      success: ${{ steps.done.outputs.SUCCESS }}
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(startsWith(inputs.runner, 'windows') && inputs.dummy_matrix || '[]') }}
+    runs-on: ${{inputs.runner}}
+    env:
+      SCCACHE_BUCKET: rapids-sccache-devs
+      SCCACHE_REGION: us-east-2
+      SCCACHE_IDLE_TIMEOUT: 0
+      SCCACHE_S3_USE_SSL: true
+      SCCACHE_S3_NO_CREDENTIALS: false
+    steps:
+      - name: Get AWS credentials for sccache bucket
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+          aws-region: us-east-2
+          role-duration-seconds: 43200 # 12 hours
+      - name: Fetch ${{ inputs.image }}
+        run: docker pull ${{ inputs.image }}
+      - name: Run Command
+        run: >-
+          docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
+                                                        [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
+                                                        [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
+                                                        git clone https://github.com/NVIDIA/cccl.git;
+                                                        cd cccl;
+                                                        git fetch --all;
+                                                        git checkout ${{github.ref_name}};
+                                                        ${{inputs.command}}"
+      - name: Mark job as successful
+        id: done
+        run: |
+          echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT}
+          mkdir dispatch-job-success
+          touch dispatch-job-success/${{inputs.id}}
+      - name: Upload dispatch-job-success
+        uses: actions/upload-artifact@v3
+        with:
+          name: dispatch-job-success
+          path: dispatch-job-success/${{inputs.id}}
diff --git a/.github/workflows/workflow-dispatch-two-stage.yml b/.github/workflows/workflow-dispatch-two-stage.yml
new file mode 100644
index 0000000000..65b6d1eba6
--- /dev/null
+++ b/.github/workflows/workflow-dispatch-two-stage.yml
@@ -0,0 +1,64 @@
+name: "Workflow/Dispatch/TwoStage"
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -euo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      producers: {type: string, required: true}
+      consumers: {type: string, required: true}
+
+permissions:
+  contents: read
+
+jobs:
+  # It is impossible to accumulate output variables across a matrix,
+  # and we cannot rely on the results of the dispatch-job workflow to determine success.
+  # See the note in ci-dispatch-job.yml for more information.
+  #
+  # Since we cannot accumulate results from multiple producers, only support a single producer for now.
+  # This is enforced by compute-matrix.py.
+  producers:
+    # This is an internal dispatch job and the name is not important.
+    # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values.
+    # This keeps the UI from getting cluttered.
+    name: "p.${{ matrix.id }}"
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.producers)}}
+    uses: ./.github/workflows/workflow-dispatch-job.yml
+    with:
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+      id:       ${{ matrix.id }}
+
+  consumers:
+    # This is an internal dispatch job and the name is not important.
+    # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values.
+    # This keeps the UI from getting cluttered.
+    name: "c.${{ matrix.id }}"
+    needs: producers
+    # dispatch-job's result is always false, check the outputs instead. See ci-dispatch-job.yml for more information.
+    if: ${{ !cancelled() && fromJson(needs.producers.outputs.success) }}
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.consumers)}}
+    uses: ./.github/workflows/workflow-dispatch-job.yml
+    with:
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+      id:       ${{ matrix.id }}
diff --git a/.github/workflows/workflow-dispatch.yml b/.github/workflows/workflow-dispatch.yml
new file mode 100644
index 0000000000..adab0f6f36
--- /dev/null
+++ b/.github/workflows/workflow-dispatch.yml
@@ -0,0 +1,53 @@
+name: "Workflow/Dispatch/Group"
+
+defaults:
+  run:
+    shell: bash --noprofile --norc -euo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      name: {type: string, required: true}
+      jobs: {type: string, required: true}
+
+permissions:
+  contents: read
+
+jobs:
+
+  standlone-jobs:
+    # This is an internal dispatch job and the name is not important.
+    # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values.
+    # This keeps the UI from getting cluttered.
+    name: "s.${{ matrix.id }}"
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.jobs)['standalone']}}
+    uses: ./.github/workflows/workflow-dispatch-job.yml
+    with:
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+      id:       ${{ matrix.id }}
+
+  two-stage-jobs:
+    # This is an internal dispatch job and the name is not important.
+    # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values.
+    # This keeps the UI from getting cluttered.
+    name: "t.${{ matrix.id }}"
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.jobs)['two_stage']}}
+    uses: ./.github/workflows/workflow-dispatch-two-stage.yml
+    with:
+      producers: ${{ toJSON(matrix.producers) }}
+      consumers: ${{ toJSON(matrix.consumers) }}
diff --git a/.gitignore b/.gitignore
index ceb7d48ce5..4f09d2f5e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 .idea/
-*build*/
+build*/
 .cache
 .aws
 .config
diff --git a/CMakePresets.json b/CMakePresets.json
index dbbc80a4d9..290024f565 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -60,7 +60,7 @@
       "name": "all-dev-debug",
       "displayName": "all-dev debug",
       "inherits": "all-dev",
-      "cacheVariables":{
+      "cacheVariables": {
         "CCCL_ENABLE_BENCHMARKS": false,
         "CMAKE_BUILD_TYPE": "Debug",
         "CMAKE_CUDA_FLAGS": "-G"
diff --git a/ci-overview.md b/ci-overview.md
index 56314fbab3..d91781ad52 100644
--- a/ci-overview.md
+++ b/ci-overview.md
@@ -33,6 +33,8 @@ Special commands are provided that can be included in commit messages to direct
    - **Example:** `git commit -m "[skip ci] Update README."`
 
 - `[skip-tests]`: Skips CI jobs that execute tests, but runs all other jobs. Useful to avoid time-consuming tests when changes are unlikely to affect them.
+- `[all-projects]`: CI normally skips projects that don't have changes in themselves or their dependencies. This forces all projects to build.
+- `[workflow:<workflow>]`:  Execute jobs from the named workflow. Example: `[workflow:nightly]` runs all jobs defined in `matrix.yaml`'s `workflows.nightly` list.
 
 Use these commands judiciously. While they offer flexibility, they should be used appropriately to maintain the codebase's integrity and quality.
 
diff --git a/ci/build_common.sh b/ci/build_common.sh
index 239d463ead..a06af83641 100755
--- a/ci/build_common.sh
+++ b/ci/build_common.sh
@@ -216,10 +216,13 @@ function test_preset()
 {
     local BUILD_NAME=$1
     local PRESET=$2
-    local GROUP_NAME="🚀  Test ${BUILD_NAME}"
+    local GPU_REQUIRED=${3:-"true"}
 
-    fail_if_no_gpu
+    if [ "${GPU_REQUIRED}" == "true" ]; then
+        fail_if_no_gpu
+    fi
 
+    local GROUP_NAME="🚀  Test ${BUILD_NAME}"
 
     ctest_log_dir="${BUILD_DIR}/log/ctest"
     ctest_log="${ctest_log_dir}/${PRESET}"
diff --git a/ci/infra_cccl.sh b/ci/infra_cccl.sh
new file mode 100755
index 0000000000..475799ace2
--- /dev/null
+++ b/ci/infra_cccl.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+source "$(dirname "$0")/build_common.sh"
+
+print_environment_details
+
+PRESET="cccl-infra"
+
+CMAKE_OPTIONS=""
+
+GPU_REQUIRED="false"
+
+if [ -n "${GITHUB_SHA:-}" ]; then
+  CMAKE_OPTIONS="$CMAKE_OPTIONS -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}"
+fi
+
+configure_preset "CCCL Infra" "$PRESET" "$CMAKE_OPTIONS"
+test_preset "CCCL Infra" "$PRESET" "$GPU_REQUIRED"
+
+print_time_summary
diff --git a/ci/inspect_changes.sh b/ci/inspect_changes.sh
index 59500a7055..342ce22493 100755
--- a/ci/inspect_changes.sh
+++ b/ci/inspect_changes.sh
@@ -21,6 +21,7 @@ base_sha=$(git merge-base $head_sha $base_sha)
 
 # Define a list of subproject directories:
 subprojects=(
+  cccl
   libcudacxx
   cub
   thrust
@@ -28,17 +29,33 @@ subprojects=(
 
 # ...and their dependencies:
 declare -A dependencies=(
+  [cccl]=""
   [libcudacxx]="cccl"
   [cub]="cccl libcudacxx thrust"
   [thrust]="cccl libcudacxx cub"
 )
 
+declare -A project_names=(
+  [cccl]="CCCL Infrastructure"
+  [libcudacxx]="libcu++"
+  [cub]="CUB"
+  [thrust]="Thrust"
+)
+
 write_output() {
   local key="$1"
   local value="$2"
   echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
 }
 
+tee_to_step_summary() {
+  if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
+    tee -a "${GITHUB_STEP_SUMMARY}"
+  else
+    cat
+  fi
+}
+
 dirty_files() {
   git diff --name-only "${base_sha}" "${head_sha}"
 }
@@ -90,26 +107,14 @@ add_dependencies() {
   return 0
 }
 
-# write_subproject_status <subproject>
-# Write the output <subproject_uppercase>_DIRTY={true|false}
-write_subproject_status() {
-  local subproject="$1"
-  local dirty_flag=${subproject^^}_DIRTY
-
-  if [[ ${!dirty_flag} -ne 0 ]]; then
-    write_output "${dirty_flag}" "true"
-  else
-    write_output "${dirty_flag}" "false"
-  fi
-}
-
 main() {
   # Print the list of subprojects and all of their dependencies:
   echo "Subprojects: ${subprojects[*]}"
   echo
   echo "Dependencies:"
   for subproject in "${subprojects[@]}"; do
-    echo "  - ${subproject} -> ${dependencies[$subproject]}"
+    printf "  - %-27s -> %s\n" "$subproject (${project_names[$subproject]})" "${dependencies[$subproject]}"
+
   done
   echo
 
@@ -117,36 +122,74 @@ main() {
   echo "HEAD SHA: ${head_sha}"
   echo
 
+  check="+/-"
+  no_check="   "
+  get_checkmark() {
+    if [[ $1 -eq 0 ]]; then
+      echo "$no_check"
+    else
+      echo "$check"
+    fi
+  }
+
   # Print the list of files that have changed:
-  echo "Dirty files:"
+  echo "::group::Dirty files"
   dirty_files | sed 's/^/  - /'
-  echo ""
+  echo "::endgroup::"
+  echo
+
+
+  echo "<details><summary><h3>👃 Inspect Changes</h3></summary>" | tee_to_step_summary
+  echo | tee_to_step_summary
+
+  echo -e "### Modifications in project?\n" | tee_to_step_summary
+  echo "|     | Project" | tee_to_step_summary
+  echo "|-----|---------" | tee_to_step_summary
 
-  echo "Modifications in project?"
   # Assign the return value of `inspect_cccl` to the variable `CCCL_DIRTY`:
   inspect_cccl
   CCCL_DIRTY=$?
-  echo "$(if [[ ${CCCL_DIRTY} -eq 0 ]]; then echo " "; else echo "X"; fi) - CCCL Infrastructure"
+  checkmark="$(get_checkmark ${CCCL_DIRTY})"
+  echo "| ${checkmark} | ${project_names[cccl]}" | tee_to_step_summary
 
   # Check for changes in each subprojects directory:
   for subproject in "${subprojects[@]}"; do
+    if [[ ${subproject} == "cccl" ]]; then
+      # Special case handled above.
+      continue
+    fi
+
     inspect_subdir $subproject
-    declare ${subproject^^}_DIRTY=$?
-    echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}"
+    local dirty=$?
+    declare ${subproject^^}_DIRTY=${dirty}
+    checkmark="$(get_checkmark ${dirty})"
+    echo "| ${checkmark} | ${project_names[$subproject]}" | tee_to_step_summary
   done
-  echo
+  echo | tee_to_step_summary
+
+  echo -e "### Modifications in project or dependencies?\n" | tee_to_step_summary
+  echo "|     | Project" | tee_to_step_summary
+  echo "|-----|---------" | tee_to_step_summary
 
-  echo "Modifications in project or dependencies?"
   for subproject in "${subprojects[@]}"; do
     add_dependencies ${subproject}
-    declare ${subproject^^}_DIRTY=$?
-    echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}"
+    local dirty=$?
+    declare ${subproject^^}_DIRTY=${dirty}
+    checkmark="$(get_checkmark ${dirty})"
+    echo "| ${checkmark} | ${project_names[$subproject]}" | tee_to_step_summary
   done
-  echo
 
+  echo "</details>" | tee_to_step_summary
+
+  declare -a dirty_subprojects=()
   for subproject in "${subprojects[@]}"; do
-    write_subproject_status ${subproject}
+    var_name="${subproject^^}_DIRTY"
+    if [[ ${!var_name} -ne 0 ]]; then
+      dirty_subprojects+=("$subproject")
+    fi
   done
+
+  write_output "DIRTY_PROJECTS" "${dirty_subprojects[*]}"
 }
 
 main "$@"
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index 589de44bd3..42d2cb88c3 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -1,12 +1,6 @@
-
-cuda_prev_min: &cuda_prev_min '11.1'
-cuda_prev_max:  &cuda_prev_max  '11.8'
-cuda_curr: &cuda_curr '12.4'
-
-# The GPUs to test on
-gpus:
-  - 'a100'
-  - 'v100'
+ctk_11_1: &ctk_11_1 '11.1'
+ctk_11_8: &ctk_11_8 '11.8'
+ctk_curr: &ctk_curr '12.4'
 
 # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
 devcontainer_version: '24.06'
@@ -42,54 +36,241 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
 # oneAPI configs
 oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
 
-# Each environment below will generate a unique build/test job
-# See the "compute-matrix" job in the workflow for how this is parsed and used
-# cuda: The CUDA Toolkit version
-# os: The operating system used
-# cpu: The CPU architecture
-# compiler: The compiler to use
-#   name: The compiler name
-#   version: The compiler version
-#   exe: The unverionsed compiler binary name
-# std: The C++ standards to build for
-#    This field is unique as it will generate an independent build/test job for each value
-
-# Configurations that will run for every PR
-pull_request:
-  nvcc:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc6,     std: [11, 14],         jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9,    std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [14, 17],         jobs: ['build']}
-    - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11,    std: [11, 14, 17],     jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9,    std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10,   std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16,   std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [14, 17],         jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [14, 17, 20],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi,   std: [11, 14, 17],     jobs: ['build']}
-  nvrtc:
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', std: [11, 14, 17, 20]}
-  clang-cuda:
-    - {lib: ['thrust', 'cub', 'libcudacxx'], cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest, std: [17, 20]}
-  cccl-infra:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest}
+# GHA Workflow job matrices:
+workflows:
+  pull_request:
+    # default_projects: nvcc
+    - {jobs: ['build'], std: 'all', ctk: *ctk_11_1, cxx: [*gcc6, *gcc7, *gcc8, *gcc9, *llvm9, *msvc2017]}
+    - {jobs: ['build'], std: 'all', ctk: *ctk_11_8, cxx: [*gcc11], sm: '60;70;80;90'}
+    - {jobs: ['build'], std: 'all', cxx: [*gcc7, *gcc8, *gcc9, *gcc10, *gcc11]}
+    - {jobs: ['build'], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14, *llvm15]}
+    - {jobs: ['test'],  std: 'all', cxx: [*gcc12, *llvm16]}
+    - {jobs: ['build'], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'}
+    - {jobs: ['build'], std: 'all', cxx: [*gcc12], sm: '90a'}
+    - {jobs: ['build'], std: 'all', cxx: [*oneapi]}
+    - {jobs: ['build'], std: 'all', cxx: [*msvc2019, *msvc2022]}
+    # default_projects: clang-cuda
+    - {jobs: ['build'], std: [17, 20], cudacxx: *llvm-newest, cxx: *llvm-newest}
+    # nvrtc:
+    - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
+    # verify-codegen:
+    - {jobs: ['verify_codegen'], project: 'libcudacxx'}
+    # cccl-infra:
+    - {jobs: ['infra'], project: 'cccl', ctk: *ctk_11_1, cxx: [*gcc-oldest, *llvm-oldest]}
+    - {jobs: ['infra'], project: 'cccl', ctk: *ctk_curr, cxx: [*gcc-newest, *llvm-newest]}
+  nightly:
+  # libcudacxx build fails, CUB tests fail:
+    - {jobs: ['build'], ctk: *ctk_11_1, gpu: 'v100',     sm: 'gpu', cxx: *gcc6,   std: [11],     project: ['cub']}
+    - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 'v100',     sm: 'gpu', cxx: *gcc6,   std: [11],     project: ['thrust']}
+  # - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 'v100',     sm: 'gpu', cxx: *gcc6,   std: [11]      }
+
+  # libcudacxx build fails, CUB tests fail:
+    - {jobs: ['build'], ctk: *ctk_11_1, gpu: 't4',       sm: 'gpu', cxx: *llvm9,  std: [17],     project: ['cub']}
+    - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 't4',       sm: 'gpu', cxx: *llvm9,  std: [17],     project: ['thrust']}
+  # - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 't4',       sm: 'gpu', cxx: *llvm9,  std: [17]      }
+
+  # CUB + libcudacxx tests fails:
+    - {jobs: ['build'], ctk: *ctk_11_8, gpu: 'rtx2080',  sm: 'gpu', cxx: *gcc11,  std: [17],     project: ['libcudacxx', 'cub']}
+    - {jobs: ['test'],  ctk: *ctk_11_8, gpu: 'rtx2080',  sm: 'gpu', cxx: *gcc11,  std: [17],     project: ['thrust']}
+  # - {jobs: ['test'],  ctk: *ctk_11_8, gpu: 'rtx2080',  sm: 'gpu', cxx: *gcc11,  std: [17]      }
+
+  # libcudacxx tests fail:
+    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7,   std: [14],     project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all',    project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'rtx4090',  sm: 'gpu', cxx: *llvm9,  std: [11],     project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20], project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *llvm16, std: [17],     project: ['libcudacxx']}
+    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7,   std: [14],     project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all',    project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtx4090',  sm: 'gpu', cxx: *llvm9,  std: [11],     project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20], project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *llvm16, std: [17],     project: ['cub', 'thrust']}
+   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7,   std: [14]     }
+   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all'    }
+   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtx4090',  sm: 'gpu', cxx: *llvm9,  std: [11]     }
+   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20] }
+   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *llvm16, std: [17]     }
+
+    # nvrtc:
+    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 't4',       sm: 'gpu', cxx: *gcc12,  std: [20],     project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc12,  std: [20],     project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all',    project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20], project: ['libcudacxx']}
+
+#
+# Resources for compute_matrix.py. These can be modified to add new jobs, etc.
+#
+# Jobs are executed by running scripts:
+# - Linux:   'ci/<job>_<project>.sh`
+# - Windows: `ci/windows/<job>_<project>.bat`
+
+# A matrix entry must have the following tag.
+required_tags:
+  - 'jobs' # A list of job types to run (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...) for
+           # the specified configuration(s).
+
+# If a matrix entry omits these tags, a default value (defined later in `default_<tag>`) is used.
+defaulted_tags:
+ - 'ctk'             # CUDA ToolKit version. Will be exploded if a list.
+ - 'cpu'             # CPU architecture. Will be exploded if a list.
+ - 'gpu'             # GPU model. Will be exploded if a list.
+ - 'cxx'             # Host compiler {name, version, exe}. Will be exploded if a list.
+ - 'cudacxx'         # Device compiler as {name, version, exe} or 'nvcc' to use nvcc from the specified `ctk`.
+                     # Will be exploded if a list.
+ - 'project'         # Project name (e.g. libcudacxx, cub, thrust, cccl). Will be exploded if a list.
+ - 'os'              # Operating system. Will be exploded if a list.
+
+# These tags will only exist if needed:
+optional_tags:
+  - 'std'             # C++ standard. Passed to script with `-std <std>`. Will be exploded if a list.
+                      # If set to 'all', all stds supported by the host/device compiler are used.
+  - 'sm'              # `CMAKE_CUDA_ARCHITECTURES` Passed to script with `-arch <sm>`.
+                      # Defaults to use the settings in the CMakePresets.json file.
+                      # Set to 'gpu' to only target the GPU in the `gpu` tag.
+                      # Can pass multiple architectures via "60;70-real;80-virtual"
+                      # Will be exploded if a list (e.g. `sm: ['60;70;80;90', '90a']` creates two jobs)
+  - 'cmake_options'   # Additional CMake options to pass to the build. Passed to script with `-cmake_options "<cmake_options>"`.
+                      # Will be exploded if a list.
+
+# `default_<tag>`: Used when the tag is omitted.
+default_ctk: *ctk_curr
+default_cudacxx: 'nvcc'
+default_cxx: *gcc12
+default_cpu: 'amd64'
+default_gpu: 'v100'
+default_project:
+  - 'libcudacxx'
+  - 'cub'
+  - 'thrust'
+# Special handling: lookup os from ctk/cxx info
+# See `matrix.yml` at https://github.com/rapidsai/devcontainers
+default_os_lookup:
+  'ctk11.1-gcc6': 'ubuntu18.04'
+  'ctk11.1-gcc7': 'ubuntu18.04'
+  'ctk11.1-gcc8': 'ubuntu18.04'
+  'ctk11.1-gcc9': 'ubuntu18.04'
+  'ctk11.1-llvm9': 'ubuntu18.04'
+  'ctk11.1-cl14.16': 'windows2022'
+  'ctk11.8-gcc11': 'ubuntu22.04'
+  'ctk12.4-gcc7': 'ubuntu20.04'
+  'ctk12.4-gcc8': 'ubuntu20.04'
+  'ctk12.4-gcc9': 'ubuntu20.04'
+  'ctk12.4-gcc10': 'ubuntu20.04'
+  'ctk12.4-gcc11': 'ubuntu22.04'
+  'ctk12.4-gcc12': 'ubuntu22.04'
+  'ctk12.4-llvm9': 'ubuntu20.04'
+  'ctk12.4-llvm10': 'ubuntu20.04'
+  'ctk12.4-llvm11': 'ubuntu20.04'
+  'ctk12.4-llvm12': 'ubuntu20.04'
+  'ctk12.4-llvm13': 'ubuntu20.04'
+  'ctk12.4-llvm14': 'ubuntu20.04'
+  'ctk12.4-llvm15': 'ubuntu22.04'
+  'ctk12.4-llvm16': 'ubuntu22.04'
+  'ctk12.4-cl14.29': 'windows2022'
+  'ctk12.4-cl14.39': 'windows2022'
+  'ctk12.4-oneapi2023.2.0': 'ubuntu22.04'
+
+# Lookup supported C++ standards for a given compiler when `std: 'all'`.
+all_stds:           [11, 14, 17, 20]
+lookup_cxx_supported_stds:
+  'gcc6':           [11, 14        ]
+  'gcc7':           [11, 14, 17    ]
+  'gcc8':           [11, 14, 17    ]
+  'gcc9':           [11, 14, 17    ]
+  'gcc10':          [11, 14, 17, 20]
+  'gcc11':          [11, 14, 17, 20]
+  'gcc12':          [11, 14, 17, 20]
+  'llvm9':          [11, 14, 17    ]
+  'llvm10':         [11, 14, 17    ]
+  'llvm11':         [11, 14, 17, 20]
+  'llvm12':         [11, 14, 17, 20]
+  'llvm13':         [11, 14, 17, 20]
+  'llvm14':         [11, 14, 17, 20]
+  'llvm15':         [11, 14, 17, 20]
+  'llvm16':         [11, 14, 17, 20]
+  'cl14.16':        [    14        ]
+  'cl14.29':        [    14, 17    ]
+  'cl14.39':        [    14, 17, 20]
+  'oneapi2023.2.0': [11, 14, 17    ]
+lookup_cudacxx_supported_stds:
+  'nvcc11.1':       [11, 14, 17    ]
+  'nvcc11.8':       [11, 14, 17    ]
+  'nvcc12.4':       [11, 14, 17, 20]
+  'llvm16':         [11, 14, 17, 20]
+
+# Tags that aren't exploded:
+non_exploded_tags:
+  - 'jobs' # Keeping jobs as a list allows for dependency handling of build->test steps.
+
+# Jobs that have an implied prerequisite 'build' job:
+build_required_jobs:
+  - 'test'
+
+# Jobs that require a GPU
+gpu_required_jobs:
+  - 'test'
+  - 'nvrtc'
+  - 'infra' # cccl infra's example project test launches a kernel
+
+# When --skip-tests is given to compute-matrix.py, these jobs are ignored.
+skip_test_jobs:
+  - 'test'
+  - 'nvrtc'
+  - 'infra'
+
+# Human readable name for jobs. Default behavior is to capitalize the first letter.
+formatted_jobs:
+  'nvrtc':          'NVRTC'
+  'verify_codegen': 'VerifyCodegen'
+
+# Human readable name for projects. Default behavior uses the project name as-is.
+formatted_project_names:
+  'libcudacxx': 'libcu++'
+  'cub': 'CUB'
+  'thrust': 'Thrust'
+  'cccl': 'CCCL'
+
+# Human readable name for compilers. Default behavior uses the "compiler.name" tag as-is.
+formatted_cxx_names:
+  'llvm': 'clang'
+  'oneapi': 'Intel'
+  'cl': 'MSVC'
+
+# All known GPUs
+gpus:
+  - 'v100'     # 40 runners
+  - 't4'       #  8 runners
+  - 'rtx2080'  #  8 runners
+  - 'rtxa6000' # 12 runners
+  - 'l4'       # 48 runners
+  - 'rtx4090'  # 10 runners
+  - 'h100'     # 16 runners
+
+# SM versions of GPUs
+gpu_sm:
+  'v100':     '70'
+  't4':       '75'
+  'rtx2080':  '75'
+  'rtxa6000': '86'
+  'l4':       '89'
+  'rtx4090':  '89'
+  'h100':     '90'
+
+# Memory size of GPUs
+gpu_mem_gb:
+  'v100':     '32'
+  't4':       '16'
+  'rtx2080':  '8'
+  'rtxa6000': '48'
+  'l4':       '24'
+  'rtx4090':  '24'
+  'h100':     '80'
+
+# GPUs that require `-testing` at the end of the runner pool name.
+testing_pool_gpus:
+  - 't4'
+  - 'rtx2080'
+  - 'rtxa6000'
+  - 'l4'
+  - 'rtx4090'
+  - 'h100'
diff --git a/ci/verify_codegen.sh b/ci/verify_codegen_libcudacxx.sh
similarity index 100%
rename from ci/verify_codegen.sh
rename to ci/verify_codegen_libcudacxx.sh

From 09e8a8e96dbaf2981873ce58800ed0d6683028e0 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 3 May 2024 14:10:04 -0400
Subject: [PATCH 3/4] Branch protection WAR: #605 Reprise

---
 .github/workflows/ci-workflow-nightly.yml     | 29 ++++++++-----
 .../workflows/ci-workflow-pull-request.yml    | 41 ++++++++++++-------
 2 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/ci-workflow-nightly.yml b/.github/workflows/ci-workflow-nightly.yml
index ed1bb149b3..f6543149b4 100644
--- a/.github/workflows/ci-workflow-nightly.yml
+++ b/.github/workflows/ci-workflow-nightly.yml
@@ -61,10 +61,11 @@ jobs:
       name: ${{ matrix.name }}
       jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }}
 
-  # Check all other job statuses. This job gates branch protection checks.
-  ci:
-    name: CI
-    if: ${{ always() || !cancelled() }}
+  # This job acts as a sentry and will fail if any leaf job in the workflow tree fails, as
+  # run-workflow always succeeds. Use this job when checking for successful matrix workflow job completion.
+  verify-workflow:
+    name: Verify and summarize workflow results
+    if: ${{ always() && !cancelled() }}
     needs:
       - build-workflow
       - run-workflow
@@ -78,9 +79,19 @@ jobs:
       - name: Check workflow success
         id: check-workflow
         uses: ./.github/actions/workflow-results
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
 
+  # Check all other job statuses. This job gates branch protection checks.
+  ci:
+    name: CI
+    # !! Important: This job is used for branch protection checks.
+    # !! Need to use always() instead of !cancelled() because skipped jobs count as success
+    # !! for Github branch protection checks. Yes, really: by default, branch protections
+    # !! can be bypassed by cancelling CI. See NVIDIA/cccl#605.
+    if: ${{ always() }}
+    needs:
+      - verify-workflow
+    runs-on: ubuntu-latest
+    steps:
       - name: Check results
         run: |
           status="passed"
@@ -98,10 +109,8 @@ jobs:
             fi
           }
 
-          # Note that run-workflow is different:
-          check_result "build-workflow" "success" "${{needs.build-workflow.result}}"
-          check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}"
+          check_result "verify-workflow"      "success" "${{needs.verify-workflow.result}}"
 
-          if [[ "$status" == "failed" ]]; then
+          if [[ "$status" != "success" ]]; then
             exit 1
           fi
diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml
index 3ff29cfeb2..8fc17bd5bf 100644
--- a/.github/workflows/ci-workflow-pull-request.yml
+++ b/.github/workflows/ci-workflow-pull-request.yml
@@ -73,21 +73,14 @@ jobs:
       name: ${{ matrix.name }}
       jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }}
 
-  verify-devcontainers:
-    name: Verify Dev Containers
-    permissions:
-      id-token: write
-      contents: read
-    uses: ./.github/workflows/verify-devcontainers.yml
-
-  # Check all other job statuses. This job gates branch protection checks.
-  ci:
-    name: CI
+  # This job acts as a sentry and will fail if any leaf job in the workflow tree fails, as
+  # run-workflow always succeeds. Use this job when checking for successful matrix workflow job completion.
+  verify-workflow:
+    name: Verify and summarize workflow results
     if: ${{ always() && !cancelled() }}
     needs:
       - build-workflow
       - run-workflow
-      - verify-devcontainers
     permissions:
       contents: read
       pull-requests: write # Posts a comment back to the PR.
@@ -107,6 +100,26 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           pr_number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }}
 
+  verify-devcontainers:
+    name: Verify Dev Containers
+    permissions:
+      id-token: write
+      contents: read
+    uses: ./.github/workflows/verify-devcontainers.yml
+
+  # Check all other job statuses. This job gates branch protection checks.
+  ci:
+    name: CI
+    # !! Important: This job is used for branch protection checks.
+    # !! Need to use always() instead of !cancelled() because skipped jobs count as success
+    # !! for Github branch protection checks. Yes, really: by default, branch protections
+    # !! can be bypassed by cancelling CI. See NVIDIA/cccl#605.
+    if: ${{ always() }}
+    needs:
+      - verify-workflow
+      - verify-devcontainers
+    runs-on: ubuntu-latest
+    steps:
       - name: Check results
         run: |
           status="passed"
@@ -124,11 +137,9 @@ jobs:
             fi
           }
 
-          # Note that run-workflow is different:
-          check_result "build-workflow" "success" "${{needs.build-workflow.result}}"
-          check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}"
+          check_result "verify-workflow"      "success" "${{needs.verify-workflow.result}}"
           check_result "verify-devcontainers" "success" "${{needs.verify-devcontainers.result}}"
 
-          if [[ "$status" == "failed" ]]; then
+          if [[ "$status" != "success" ]]; then
             exit 1
           fi

From 6f04c7eced979c70465074d22dacedd440420325 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 3 May 2024 21:59:48 +0000
Subject: [PATCH 4/4] Fix condition check.

[skip-tests] since this is only changing infra.
---
 .github/workflows/ci-workflow-nightly.yml      | 2 +-
 .github/workflows/ci-workflow-pull-request.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-workflow-nightly.yml b/.github/workflows/ci-workflow-nightly.yml
index f6543149b4..0fe41e42fc 100644
--- a/.github/workflows/ci-workflow-nightly.yml
+++ b/.github/workflows/ci-workflow-nightly.yml
@@ -94,7 +94,7 @@ jobs:
     steps:
       - name: Check results
         run: |
-          status="passed"
+          status="success"
 
           check_result() {
             name=$1
diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml
index 8fc17bd5bf..ec4c898488 100644
--- a/.github/workflows/ci-workflow-pull-request.yml
+++ b/.github/workflows/ci-workflow-pull-request.yml
@@ -122,7 +122,7 @@ jobs:
     steps:
       - name: Check results
         run: |
-          status="passed"
+          status="success"
 
           check_result() {
             name=$1