diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh
index f868cc14f17..6572e37b1ec 100755
--- a/.devcontainer/make_devcontainers.sh
+++ b/.devcontainer/make_devcontainers.sh
@@ -74,6 +74,7 @@ while [[ $# -gt 0 ]]; do
 done
 
 MATRIX_FILE="../ci/matrix.yaml"
+COMPUTE_MATRIX="../ci/compute-matrix.py"
 
 # Enable verbose mode if requested
 if [ "$VERBOSE" = true ]; then
@@ -82,16 +83,17 @@ if [ "$VERBOSE" = true ]; then
 fi
 
 # Read matrix.yaml and convert it to json
-matrix_json=$(yq -o json ${MATRIX_FILE})
+matrix_json=$(python3 ${COMPUTE_MATRIX} ${MATRIX_FILE} --devcontainer-info)
 
-# Exclude Windows environments
-readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))')
+if [ "$VERBOSE" = true ]; then
+    echo "$matrix_json"
+fi
 
 # Get the devcontainer image version and define image tag root
 readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version')
 
 # Get unique combinations of cuda version, compiler name/version, and Ubuntu version
-readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]')
+readonly combinations=$(echo "$matrix_json" | jq -c '.combinations[]')
 
 # Update the base devcontainer with the default values
 # The root devcontainer.json file is used as the default container as well as a template for all
diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml
deleted file mode 100644
index b8155e7aa65..00000000000
--- a/.github/actions/compute-matrix/action.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-
-name: Compute Matrix
-description: "Compute the matrix for a given matrix type from the specified matrix file"
-
-inputs:
-  matrix_query:
-    description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
-    required: true
-  matrix_file:
-    description: 'The file containing the matrix'
-    required: true
-outputs:
-  matrix:
-    description: 'The requested matrix'
-    value: ${{ steps.compute-matrix.outputs.MATRIX }}
-
-runs:
-  using: "composite"
-  steps:
-    - name: Compute matrix
-      id: compute-matrix
-      run: |
-        MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}}  ${{inputs.matrix_query}} )
-        echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
-      shell: bash -euxo pipefail {0}
diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh
deleted file mode 100755
index 1629836d216..00000000000
--- a/.github/actions/compute-matrix/compute-matrix.sh
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-write_output() {
-  local key="$1"
-  local value="$2"
-  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
-}
-
-explode_std_versions() {
-  jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
-}
-
-explode_libs() {
-  jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))'
-}
-
-# Filter out the libraries that are dirty
-filter_libs() {
-  all_libs=("libcudacxx" "thrust" "cub")
-  dirty_libs=()
-  for lib in "${all_libs[@]}"; do
-    dirty_var_name="${lib^^}_DIRTY"
-    # If the variable named in dirty_var_name is not set, set it to false:
-    : "${!dirty_var_name:=false}"
-    # Declare a nameref to the variable named in dirty_var_name
-    declare -n lib_dirty="$dirty_var_name"
-    # echo "${lib^^}_DIRTY: ${lib_dirty}" >> /dev/stderr
-    if [ "${lib_dirty}" = "true" ]; then
-      dirty_libs+=("$lib")
-    fi
-  done
-  # echo "Dirty libraries: ${dirty_libs[@]}" >> /dev/stderr
-
-  # Construct a regex to filter out the dirty libraries
-  dirty_lib_regex=$(IFS="|"; echo "${dirty_libs[*]}")
-  dirty_lib_regex="^(${dirty_lib_regex})\$"
-  jq_filter="map(select(.lib | test(\"$dirty_lib_regex\")))"
-  jq -cr "$jq_filter"
-}
-
-extract_matrix() {
-  local file="$1"
-  local type="$2"
-  local matrix=$(yq -o=json "$file" | jq -cr ".$type")
-  write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
-
-  local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
-  local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
-  write_output "PER_CUDA_COMPILER_MATRIX"  "$per_cuda_compiler_matrix"
-  write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')"
-
-  write_output "NVRTC_MATRIX" "$(echo "$matrix" | jq '.nvrtc' | explode_std_versions)"
-
-  local clang_cuda_matrix="$(echo "$matrix" | jq -cr '.["clang-cuda"]' | explode_std_versions | explode_libs | filter_libs)"
-  write_output "CLANG_CUDA_MATRIX" "$clang_cuda_matrix"
-  write_output "CCCL_INFRA_MATRIX" "$(echo "$matrix" | jq -cr '.["cccl-infra"]' )"
-}
-
-main() {
-  if [ "$1" == "-v" ]; then
-    set -x
-    shift
-  fi
-
-  if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
-    echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
-    echo "  -v            : Enable verbose output"
-    echo "  MATRIX_FILE   : The path to the matrix file."
-    echo "  MATRIX_TYPE   : The desired matrix. Supported values: 'pull_request'"
-    exit 1
-  fi
-
-  echo "Input matrix file:" >&2
-  cat "$1" >&2
-  echo "Matrix Type: $2" >&2
-
-  extract_matrix "$1" "$2"
-}
-
-main "$@"
diff --git a/.github/workflows/ci-dispatch-group.yml b/.github/workflows/ci-dispatch-group.yml
new file mode 100644
index 00000000000..1d5b2e465b1
--- /dev/null
+++ b/.github/workflows/ci-dispatch-group.yml
@@ -0,0 +1,45 @@
+name: "CI/Dispatch/Group"
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      name: {type: string, required: true}
+      jobs: {type: string, required: true}
+
+permissions:
+  contents: read
+
+jobs:
+  standlone-jobs:
+    name: '[Standalone]'
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.jobs)['standalone']}}
+    uses: ./.github/workflows/ci-dispatch-job.yml
+    with:
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+
+  two-stage-jobs:
+    name: '[TwoStage]'
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.jobs)['two_stage']}}
+    uses: ./.github/workflows/ci-dispatch-two-stage.yml
+    with:
+      producers: ${{ toJSON(matrix.producers) }}
+      consumers: ${{ toJSON(matrix.consumers) }}
diff --git a/.github/workflows/ci-dispatch-job.yml b/.github/workflows/ci-dispatch-job.yml
new file mode 100644
index 00000000000..aec1f06dcfa
--- /dev/null
+++ b/.github/workflows/ci-dispatch-job.yml
@@ -0,0 +1,145 @@
+name: "CI/Dispatch/Job"
+
+# Important note about depending on this workflow: The `result` will be a failure, even if successful.
+#
+# This reusable workflow dispatches to a number of internal jobs. Only one job will run,
+# and some may be in error states due to empty matrices (which are used instead of `if` to keep
+# skipped dispatch jobs out of the GHA UI). The `continue-on-error` flag should prevent these
+# errors from failing the workflow, but this does not work.
+#
+# Thus, the `result` of this workflow will always be a failure, even if the job itself is successful.
+# To depend on this job, you should use the `success` output instead:
+#
+# ```
+# dependent_job:
+#   needs: dispatch-job
+#   if: ${{ !cancelled() && needs.dispatch-job.outputs.success }}
+# ```
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  workflow_call:
+    outputs:
+      success:
+        value: ${{ contains(toJSON(jobs.*.outputs.success), 'true') }}
+    inputs:
+      name: {type: string, required: true}
+      image: {type: string, required: true}
+      runner: {type: string, required: true}
+      command: {type: string, required: true}
+      env: {type: string, required: false}
+      dummy_matrix: {type: string, required: false, default: '[{"valid": true}]'}
+
+permissions:
+  contents: read
+
+jobs:
+  linux:
+    name: ${{inputs.name}}
+    continue-on-error: ${{ ! startsWith(inputs.runner, 'linux') }}
+    outputs:
+      success: ${{ steps.done.outputs.SUCCESS }}
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        include: ${{ fromJSON(startsWith(inputs.runner, 'linux') && inputs.dummy_matrix || '[]') }}
+    runs-on: ${{inputs.runner}}
+    container:
+      options: -u root
+      image: ${{inputs.image}}
+      env:
+        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          path: cccl
+          persist-credentials: false
+      - name: Move files to coder user home directory
+        run: |
+          cp -R cccl /home/coder/cccl
+          chown -R coder:coder /home/coder/
+      - name: Add NVCC problem matcher
+        run: |
+          echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json"
+      - name: Configure credentials and environment variables for sccache
+        uses: ./cccl/.github/actions/configure_cccl_sccache
+      - name: Run command
+        shell: su coder {0}
+        env:
+          COMMAND: ${{inputs.command}}
+        run: |
+            set -eo pipefail
+            cd ~/cccl
+            echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m"
+            echo -e "\e[1;34m${COMMAND}\e[0m"
+            eval "${COMMAND}" || exit_code=$?
+            if [ ! -z "$exit_code" ]; then
+              echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
+              echo "::error:: To replicate this failure locally, follow the steps below:"
+              echo "1. Clone the repository, and navigate to the correct branch and commit:"
+              echo "   git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
+              echo ""
+              echo "2. Run the failed command inside the same Docker container used by the CI:"
+              echo "   docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
+              echo ""
+              echo "For additional information, see:"
+              echo "   - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
+              echo "   - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
+              exit $exit_code
+            fi
+      - name: Mark job as successful
+        id: done
+        run: echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT}
+
+  windows:
+    name: ${{inputs.name}}
+    continue-on-error: ${{ ! startsWith(inputs.runner, 'windows') }}
+    outputs:
+      success: ${{ steps.done.outputs.SUCCESS }}
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(startsWith(inputs.runner, 'windows') && inputs.dummy_matrix || '[]') }}
+    runs-on: ${{inputs.runner}}
+    env:
+      SCCACHE_BUCKET: rapids-sccache-devs
+      SCCACHE_REGION: us-east-2
+      SCCACHE_IDLE_TIMEOUT: 0
+      SCCACHE_S3_USE_SSL: true
+      SCCACHE_S3_NO_CREDENTIALS: false
+    steps:
+      - name: Get AWS credentials for sccache bucket
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+          aws-region: us-east-2
+          role-duration-seconds: 43200 # 12 hours
+      - name: Fetch ${{ inputs.image }}
+        run: docker pull ${{ inputs.image }}
+      - name: Run Command
+        run: >-
+          docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
+                                                        [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
+                                                        [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
+                                                        git clone https://github.com/NVIDIA/cccl.git;
+                                                        cd cccl;
+                                                        git fetch --all;
+                                                        git checkout ${{github.ref_name}};
+                                                        ${{inputs.command}}"
+      - name: Mark job as successful
+        id: done
+        run: echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT}
diff --git a/.github/workflows/ci-dispatch-two-stage.yml b/.github/workflows/ci-dispatch-two-stage.yml
new file mode 100644
index 00000000000..9f4e2cb5b22
--- /dev/null
+++ b/.github/workflows/ci-dispatch-two-stage.yml
@@ -0,0 +1,54 @@
+name: "CI/Dispatch/TwoStage"
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      producers: {type: string, required: true}
+      consumers: {type: string, required: true}
+
+permissions:
+  contents: read
+
+jobs:
+  producers:
+    name: '[Producer]'
+    # It is impossible to accumulate output variables across a matrix, and we cannot rely on the results of the dispatch-job workflow to determine success.
+    # See the note in ci-dispatch-job.yml for more information.
+    #
+    # Since we cannot accumulate results from multiple producers, only support a single producer for now.
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.producers)}}
+    uses: ./.github/workflows/ci-dispatch-job.yml
+    with:
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+
+  consumers:
+    name: '[Consumer]'
+    needs: producers
+    # dispatch-job's result is always false, check the outputs instead. See ci-dispatch-job.yml for more information.
+    if: ${{ !cancelled() && fromJson(needs.producers.outputs.success) }}
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.consumers)}}
+    uses: ./.github/workflows/ci-dispatch-job.yml
+    with:
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
index 7b5ed4ef272..3f8227152ed 100644
--- a/.github/workflows/dispatch-build-and-test.yml
+++ b/.github/workflows/dispatch-build-and-test.yml
@@ -4,6 +4,7 @@ on:
   workflow_call:
     inputs:
       project_name: {type: string, required: true}
+      job_type: {type: string, required: true}
       per_cuda_compiler_matrix: {type: string, required: true}
       devcontainer_version: {type: string, required: true}
       is_windows: {type: boolean, required: true}
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
new file mode 100644
index 00000000000..36bfe89969d
--- /dev/null
+++ b/.github/workflows/nightly.yml
@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main workflow that runs on every PR and push to main
+name: nightly
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  # FIXME: This should be a cron job that runs nightly
+  push: # Testing only
+    branches:
+      - "pull-request/[0-9]+"
+  # schedule:
+  #   - cron: '0 7 * * *' # 7AM UTC, 12AM PST, 3AM EST
+
+# Only runs one instance of this workflow at a time. Cancels any in-progress runs when a new one starts.
+concurrency:
+  group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  compute-matrix:
+    name: Compute matrix
+    runs-on: ubuntu-latest
+    outputs:
+      WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}}
+      WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}}
+    steps:
+      - name: Get Base Branch from PR
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Identify dirty subprojects
+        id: inspect-changes
+        run: |
+          ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA}
+        env:
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+      - name: Compute matrix outputs
+        id: compute-matrix
+        run: |
+          ci/compute-matrix.py ci/matrix.yaml --workflow ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }}
+
+  dispatch-groups:
+    name: ${{ matrix.name }}
+    needs:
+      - compute-matrix
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }}
+    uses: ./.github/workflows/ci-dispatch-group.yml
+    with:
+      name: ${{ matrix.name }}
+      jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }}
+
+  # This job is the final job that runs after all other jobs and is used for branch protection status checks.
+  # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
+  # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
+  ci:
+    runs-on: ubuntu-latest
+    name: CI
+    if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
+    needs:
+      - dispatch-groups
+    steps:
+      - name: Check status of all precursor jobs
+        if: >-
+          ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}}
+        run: exit 1
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 3dcee0cf6c6..b1f293a6119 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 # This is the main workflow that runs on every PR and push to main
-name: pr
+name: pull_request
 
 defaults:
   run:
@@ -35,13 +35,13 @@ permissions:
   pull-requests: read
 
 jobs:
-  inspect-changes:
-    name: "Inspect Changes"
+  compute-matrix:
+    name: Compute matrix
     runs-on: ubuntu-latest
     outputs:
-      LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }}
-      CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }}
-      THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }}
+      DEVCONTAINER_VERSION: ${{steps.compute-matrix.outputs.DEVCONTAINER_VERSION}}
+      WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}}
+      WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}}
     steps:
       - name: Get Base Branch from PR
         id: get-pr-info
@@ -49,154 +49,31 @@ jobs:
       - name: Checkout repo
         uses: actions/checkout@v3
       - name: Identify dirty subprojects
-        id: set-outputs
+        id: inspect-changes
         run: |
           ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA}
         env:
           BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
-
-  compute-matrix:
-    name: Compute matrix
-    runs-on: ubuntu-latest
-    needs:
-      - inspect-changes
-    outputs:
-      DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
-      PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
-      PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}}
-      NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}}
-      CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}}
-      CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
       - name: Compute matrix outputs
-        id: set-outputs
+        id: compute-matrix
         run: |
-          .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
-        env:
-          THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }}
-          CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }}
-          LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }}
-
-  nvrtc:
-    name: libcudacxx NVRTC CUDA${{matrix.cuda}}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
-    uses: ./.github/workflows/run-as-coder.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }}
-    with:
-      name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}}
-
-  thrust:
-    name: Thrust CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "thrust"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
+          ci/compute-matrix.py ci/matrix.yaml --workflow ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }}
 
-  cub:
-    name: CUB CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "cub"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  libcudacxx:
-    name: libcudacxx CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
+  dispatch-groups:
+    name: ${{ matrix.name }}
     needs:
       - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "libcudacxx"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  clang-cuda:
-    name: ${{matrix.lib}} Clang CUDA
-    permissions:
-      id-token: write
-      contents: read
-    needs: compute-matrix
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}}
-      runner: linux-${{matrix.cpu}}-cpu16
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}"
-
-  cccl-infra:
-    name: CCCL Infrastructure
     permissions:
       id-token: write
       contents: read
-    needs: compute-matrix
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }}
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
+        name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }}
+    uses: ./.github/workflows/ci-dispatch-group.yml
     with:
-      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}
-        ctest --preset=cccl-infra
+      name: ${{ matrix.name }}
+      jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }}
 
   verify-devcontainers:
     name: Verify Dev Containers
@@ -205,20 +82,6 @@ jobs:
       contents: read
     uses: ./.github/workflows/verify-devcontainers.yml
 
-  verify-codegen:
-    name: Verify Codegen in libcudacxx
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Run libcudacxx codegen verification
-        id: verify-codegen
-        run: |
-          sudo apt-get update
-          sudo apt-get install ninja-build
-          export CXX="g++"
-          ./ci/verify_codegen.sh
-
   # This job is the final job that runs after all other jobs and is used for branch protection status checks.
   # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
   # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
@@ -227,14 +90,8 @@ jobs:
     name: CI
     if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
     needs:
-      - clang-cuda
-      - cub
-      - libcudacxx
-      - nvrtc
-      - thrust
-      - cccl-infra
+      - dispatch-groups
       - verify-devcontainers
-      - verify-codegen
     steps:
       - name: Check status of all precursor jobs
         if: >-
diff --git a/ci/build_common.sh b/ci/build_common.sh
index 239d463eadc..5d36919c4a1 100755
--- a/ci/build_common.sh
+++ b/ci/build_common.sh
@@ -216,10 +216,13 @@ function test_preset()
 {
     local BUILD_NAME=$1
     local PRESET=$2
-    local GROUP_NAME="🚀  Test ${BUILD_NAME}"
+    local GPU_REQUIRED=${3:-"true"}
 
-    fail_if_no_gpu
+    if [ $GPU_REQUIRED -eq "true" ]; then
+        fail_if_no_gpu
+    fi
 
+    local GROUP_NAME="🚀  Test ${BUILD_NAME}"
 
     ctest_log_dir="${BUILD_DIR}/log/ctest"
     ctest_log="${ctest_log_dir}/${PRESET}"
diff --git a/ci/compute-matrix.py b/ci/compute-matrix.py
new file mode 100755
index 00000000000..eaa47b2b2de
--- /dev/null
+++ b/ci/compute-matrix.py
@@ -0,0 +1,592 @@
+#!/usr/bin/env python3
+
+"""
+Concepts:
+- matrix_job: an entry of a workflow matrix, converted from matrix.yaml["workflow"][id] into a JSON object.
+  Example:
+  {
+    "job_types": [
+      "test"
+    ],
+    "ctk": "11.1",
+    "gpu": "t4",
+    "cmake_cuda_arch": "75-real",
+    "host_compiler": {
+      "name": "llvm",
+      "version": "9",
+      "exe": "clang++"
+    },
+    "std": [
+      17
+    ],
+    "project": [
+      "libcudacxx",
+      "cub",
+      "thrust"
+    ],
+    "os": "ubuntu18.04"
+  }
+
+Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where
+the matrix job is turned into one or more dispatch groups consisting of potentially many jobs.
+
+- dispatch_group_json: A json object used in conjunction with the ci-dispatch-groups.yml GHA workflow.
+  Example:
+  {
+    "<group name>": {
+      "standalone": [ {<job_json>}, ... ]
+      "two_stage": [ {<two_stage_json>}, ]
+    }
+  }
+
+- two_stage_json: A json object that represents bulk-synchronous producer/consumer jobs, used with ci-dispatch-two-stage.yml.
+  Example:
+  {
+    "producers": [ {<job_json>}, ... ],
+    "consumers": [ {<job_json>}, ... ]
+  }
+
+- job_json: A json object that represents a single job in a workflow. Used with ci-dispatch-job.yml.
+  Example:
+  {
+    name: "...",
+    runner: "...",
+    image: "...",
+    command: "..." },
+  }
+"""
+
+import argparse
+import copy
+import json
+import os
+import re
+import sys
+import yaml
+
+matrix_yaml = None
+
+
+def write_output(key, value):
+    print(f"::group::GHA Output: {key}")
+    print(f"{key}={value}")
+    print("::endgroup::")
+
+    # Check if the GITHUB_OUTPUT environment variable is set, and write to that file if it is.
+    output_file = os.environ.get('GITHUB_OUTPUT')
+    if output_file:
+        with open(output_file, 'a') as f:
+            print(f"{key}={value}", file=f)
+
+
+def lookup_os(ctk, host_compiler_name, host_compiler_version):
+    key = f'ctk{ctk}-{host_compiler_name}{host_compiler_version}'
+    return matrix_yaml['default_os_lookup'][key]
+
+
+def get_formatted_projected_name(project_name):
+    if project_name in matrix_yaml['formatted_project_names']:
+        return matrix_yaml['formatted_project_names'][project_name]
+    return project_name
+
+
+def get_formatted_host_compiler_name(host_compiler):
+    config_name = host_compiler['name']
+    if config_name in matrix_yaml['formatted_host_compiler_names']:
+        return matrix_yaml['formatted_host_compiler_names'][config_name]
+    return config_name
+
+
+def get_formatted_job_type(job_type):
+    if job_type in matrix_yaml['formatted_job_types']:
+        return matrix_yaml['formatted_job_types'][job_type]
+    # Return with first letter capitalized:
+    return job_type.capitalize()
+
+
+def is_windows(matrix_job):
+    return matrix_job['os'].startswith('windows')
+
+
+def validate_required_tags(matrix_job):
+    for tag in matrix_yaml['required_tags']:
+        if tag not in matrix_job:
+            raise Exception(f"Missing required tag '{tag}' in matrix job {matrix_job}")
+
+    required_tags = set(matrix_yaml['required_tags'])
+    defaulted_tags = set(matrix_yaml['defaulted_tags'])
+    optional_tags = set(matrix_yaml['optional_tags'])
+    all_tags = required_tags | defaulted_tags | optional_tags
+
+    for tag in matrix_job:
+        if tag not in all_tags:
+            raise Exception(f"Unknown tag '{tag}' in matrix job {matrix_job}")
+
+
+def set_default_tags(matrix_job):
+    generic_defaults = set(matrix_yaml['defaulted_tags'])
+    generic_defaults -= set(['os'])  # handled specially.
+
+    for tag in generic_defaults:
+        if tag not in matrix_job:
+            matrix_job[tag] = matrix_yaml['default_'+tag]
+
+
+def set_derived_tags(matrix_job):
+    if 'os' not in matrix_job:
+        matrix_job['os'] = lookup_os(matrix_job['ctk'],
+                                     matrix_job['host_compiler']['name'],
+                                     matrix_job['host_compiler']['version'])
+
+    # Expand nvcc device compiler shortcut:
+    if matrix_job['device_compiler'] == 'nvcc':
+        matrix_job['device_compiler'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'}
+
+
+def generate_dispatch_group_name(matrix_job):
+    project_name = get_formatted_projected_name(matrix_job['project'])
+    ctk = matrix_job['ctk']
+    device_compiler = matrix_job['device_compiler']
+    host_compiler_name = get_formatted_host_compiler_name(matrix_job['host_compiler'])
+
+    compiler_info = ""
+    if device_compiler['name'] == 'nvcc':
+        compiler_info = f"nvcc {host_compiler_name}"
+    elif device_compiler['name'] == 'llvm':
+        compiler_info = f"clang-cuda"
+    else:
+        compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler_name}"
+
+    return f"{project_name} {compiler_info} CTK{ctk}"
+
+
+def generate_dispatch_job_name(matrix_job, job_type):
+    std_str = ("C++" + str(matrix_job['std']) + " ") if 'std' in matrix_job else ''
+    cpu_str = matrix_job['cpu']
+    gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_job_types'] else ""
+    cuda_compile_arch = (" sm{" + matrix_job['cmake_cuda_arch'] + "}") if 'cmake_cuda_arch' in matrix_job else ""
+    cmake_options = (' ' + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else ""
+
+    host_compiler_name = get_formatted_host_compiler_name(matrix_job['host_compiler'])
+    host_compiler_info = f"{host_compiler_name}{matrix_job['host_compiler']['version']}"
+
+    config_tag = f"{std_str}{host_compiler_info}"
+
+    formatted_job_type = get_formatted_job_type(job_type)
+
+    extra_info = f":{cuda_compile_arch}{cmake_options}" if cuda_compile_arch or cmake_options else ""
+
+    return f"[{config_tag}] {formatted_job_type}({cpu_str}{gpu_str}){extra_info}"
+
+
+def generate_dispatch_job_runner(matrix_job, job_type):
+    runner_os = "windows" if is_windows(matrix_job) else "linux"
+    cpu = matrix_job['cpu']
+
+    if not job_type in matrix_yaml['gpu_required_job_types']:
+        return f"{runner_os}-{cpu}-cpu16"
+
+    gpu = matrix_job['gpu']
+    suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else ""
+
+    return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}"
+
+
+def generate_dispatch_job_image(matrix_job, job_type):
+    devcontainer_version = matrix_yaml['devcontainer_version']
+    ctk = matrix_job['ctk']
+    image_os = matrix_job['os']
+    host_compiler = matrix_job['host_compiler']['name'] + \
+        matrix_job['host_compiler']['version']
+
+    if is_windows(matrix_job):
+        return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}"
+
+    return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}"
+
+
+def generate_dispatch_job_command(matrix_job, job_type):
+    script_path = "ci/windows" if is_windows(matrix_job) else "ci"
+    script_ext = ".ps1" if is_windows(matrix_job) else ".sh"
+    script_job_type = job_type
+    script_project = matrix_job['project']
+    script_name = f"{script_path}/{script_job_type}_{script_project}{script_ext}"
+
+    std_str = str(matrix_job['std']) if 'std' in matrix_job else ''
+
+    host_compiler_exe = matrix_job['host_compiler']['exe']
+    device_compiler_name = matrix_job['device_compiler']['name']
+    device_compiler_exe = matrix_job['device_compiler']['exe']
+
+    cuda_compile_arch = matrix_job['cmake_cuda_arch'] if 'cmake_cuda_arch' in matrix_job else ''
+    cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else ''
+
+    command = f"\"{script_name}\""
+    if std_str:
+        command += f" -std \"{std_str}\""
+    if cuda_compile_arch:
+        command += f" -arch \"{cuda_compile_arch}\""
+    if device_compiler_name != 'nvcc':
+        command += f" -cuda \"{device_compiler_exe}\""
+    if cmake_options:
+        cmake_args = " ".join([f"{key}={value}" for key, value in cmake_options.items()])
+        command += f" -cmake-options \"{cmake_args}\""
+
+    return command
+
+
+def generate_dispatch_job_json(matrix_job, job_type):
+    return {
+        'name': generate_dispatch_job_name(matrix_job, job_type),
+        'runner': generate_dispatch_job_runner(matrix_job, job_type),
+        'image': generate_dispatch_job_image(matrix_job, job_type),
+        'command': generate_dispatch_job_command(matrix_job, job_type)
+    }
+
+
+# Create a single build producer, and a separate consumer for each test_job_type:
+def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types):
+    build_json = generate_dispatch_job_json(matrix_job, build_job_type)
+
+    test_json = []
+    for test_job_type in test_job_types:
+        test_json.append(generate_dispatch_job_json(matrix_job, test_job_type))
+
+    return {
+        "producers": [build_json],
+        "consumers": test_json
+    }
+
+
+def generate_dispatch_group_jobs(matrix_job):
+    dispatch_group_jobs = {
+        "standalone": [],
+        "two_stage": []
+    }
+
+    job_types = set(copy.deepcopy(matrix_job['job_types']))
+
+    # job_types that appear in build_required_job_types:
+    build_required = set(matrix_yaml['build_required_job_types']) & job_types
+    has_build_and_test = len(build_required) > 0
+    job_types -= build_required
+
+    has_standalone_build = 'build' in job_types and not has_build_and_test
+    job_types -= {'build'}
+
+    if has_standalone_build:
+        dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, "build"))
+    elif has_build_and_test:
+        dispatch_group_jobs['two_stage'].append(
+            generate_dispatch_build_and_test_json(matrix_job, "build", build_required))
+
+    # Remaining jobs are assumed to be standalone (e.g. nvrtc):
+    for job_type in job_types:
+        dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type))
+
+    return dispatch_group_jobs
+
+
+def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups):
+    for group_name, group_json in new_dispatch_groups.items():
+        if group_name not in accum_dispatch_groups:
+            accum_dispatch_groups[group_name] = group_json
+        else:
+            # iterate standalone and two_stage:
+            for key, value in group_json.items():
+                accum_dispatch_groups[group_name][key] += value
+
+
+def matrix_job_to_dispatch_group(matrix_job):
+    return {generate_dispatch_group_name(matrix_job): generate_dispatch_group_jobs(matrix_job)}
+
+
+def explode_tags(matrix_job):
+    explode_tag = None
+    for tag in matrix_job:
+        if tag != "job_types" and isinstance(matrix_job[tag], list):
+            explode_tag = tag
+            break
+
+    if not explode_tag:
+        return [matrix_job]
+
+    result = []
+    for value in matrix_job[explode_tag]:
+        new_job = copy.deepcopy(matrix_job)
+        new_job[explode_tag] = value
+        result.extend(explode_tags(new_job))
+
+    return result
+
+
+def preprocess_matrix_jobs(matrix_jobs):
+    result = []
+    for matrix_job in matrix_jobs:
+        validate_required_tags(matrix_job)
+        set_default_tags(matrix_job)
+        for job in explode_tags(matrix_job):
+            set_derived_tags(job)
+            result.append(job)
+    return result
+
+
+def filter_projects(matrix_jobs, projects):
+    return [job for job in matrix_jobs if job['project'] in projects]
+
+
+def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig):
+    workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig)
+
+    # Merge consumers for any two_stage arrays that have the same producer(s). Print a warning.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if not 'two_stage' in group_json:
+            continue
+        two_stage_json = group_json['two_stage']
+        merged_producers = []
+        merged_consumers = []
+        for two_stage in two_stage_json:
+            producers = two_stage['producers']
+            consumers = two_stage['consumers']
+            if producers in merged_producers:
+                producer_index = merged_producers.index(producers)
+                matching_consumers = merged_consumers[producer_index]
+
+                producer_names = ", ".join([producer['name'] for producer in producers])
+                print(f"::notice file=ci/matrix.yaml::Merging consumers for duplicate producer '{producer_names}' in '{group_name}'",
+                      file=sys.stderr)
+                consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers])
+                print(f"::notice file=ci/matrix.yaml::Original consumers: {consumer_names}", file=sys.stderr)
+                consumer_names = ", ".join([consumer['name'] for consumer in consumers])
+                print(f"::notice file=ci/matrix.yaml::Duplicate consumers: {consumer_names}", file=sys.stderr)
+                # Merge if unique:
+                for consumer in consumers:
+                    if consumer not in matching_consumers:
+                        matching_consumers.append(consumer)
+                consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers])
+                print(f"::notice file=ci/matrix.yaml::Merged consumers: {consumer_names}", file=sys.stderr)
+            else:
+                merged_producers.append(producers)
+                merged_consumers.append(consumers)
+        # Update with the merged lists:
+        two_stage_json = []
+        for producers, consumers in zip(merged_producers, merged_consumers):
+            two_stage_json.append({'producers': producers, 'consumers': consumers})
+        group_json['two_stage'] = two_stage_json
+
+    # Check for any duplicate jobs in standalone arrays. Warn and remove duplicates.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        standalone_jobs = group_json['standalone'] if 'standalone' in group_json else []
+        unique_standalone_jobs = []
+        for job_json in standalone_jobs:
+            if job_json in unique_standalone_jobs:
+                print(f"::notice file=ci/matrix.yaml::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'",
+                      file=sys.stderr)
+            else:
+                unique_standalone_jobs.append(job_json)
+
+        # If any producer/consumer jobs exist in standalone arrays, warn and remove the standalones.
+        two_stage_jobs = group_json['two_stage'] if 'two_stage' in group_json else []
+        for two_stage_job in two_stage_jobs:
+            for producer in two_stage_job['producers']:
+                if producer in unique_standalone_jobs:
+                    print(f"::notice file=ci/matrix.yaml::Removing standalone job '{producer['name']}' " +
+                          f"as it appears as a producer in '{group_name}'",
+                          file=sys.stderr)
+                    unique_standalone_jobs.remove(producer)
+            for consumer in two_stage_job['consumers']:
+                if consumer in unique_standalone_jobs:
+                    print(f"::notice file=ci/matrix.yaml::Removing standalone job '{consumer['name']}' " +
+                          f"as it appears as a consumer in '{group_name}'",
+                          file=sys.stderr)
+                    unique_standalone_jobs.remove(consumer)
+        standalone_jobs = list(unique_standalone_jobs)
+
+        # If any producer or consumer job appears more than once, warn and leave as-is.
+        all_two_stage_jobs = []
+        duplicate_jobs = {}
+        for two_stage_job in two_stage_jobs:
+            for job in two_stage_job['producers'] + two_stage_job['consumers']:
+                if job in all_two_stage_jobs:
+                    duplicate_jobs[job['name']] = duplicate_jobs.get(job['name'], 1) + 1
+                else:
+                    all_two_stage_jobs.append(job)
+        for job_name, count in duplicate_jobs.items():
+            print(f"::warning file=ci/matrix.yaml::" +
+                  f"Job '{job_name}' appears {count} times in '{group_name}'.",
+                  f"Cannot remove duplicate while resolving dependencies. This job WILL execute {count} times.",
+                  file=sys.stderr)
+
+    # Remove all named values that contain an empty list of jobs:
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if not group_json['standalone'] and not group_json['two_stage']:
+            del workflow_dispatch_groups[group_name]
+        elif not group_json['standalone']:
+            del group_json['standalone']
+        elif not group_json['two_stage']:
+            del group_json['two_stage']
+
+    # Natural sort impl (handles embedded numbers in strings, case insensitive)
+    def natural_sort_key(key):
+        return [(int(text) if text.isdigit() else text.lower()) for text in re.split('(\d+)', key)]
+
+    # Sort the dispatch groups by name:
+    workflow_dispatch_groups = dict(sorted(workflow_dispatch_groups.items(), key=lambda x: natural_sort_key(x[0])))
+
+    # Sort the jobs within each dispatch group:
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'standalone' in group_json:
+            group_json['standalone'] = sorted(group_json['standalone'], key=lambda x: natural_sort_key(x['name']))
+        if 'two_stage' in group_json:
+            group_json['two_stage'] = sorted(
+                group_json['two_stage'], key=lambda x: natural_sort_key(x['producers'][0]['name']))
+
+    # Check to see if any .two_stage.producers arrays have more than 1 job, which is not supported. See ci-dispatch-two-stage.yml for details.
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'two_stage' in group_json:
+            for two_stage_json in group_json['two_stage']:
+                num_producers = len(two_stage_json['producers'])
+                if num_producers > 1:
+                    producer_names = ""
+                    for job in two_stage_json['producers']:
+                        producer_names += f" - {job['name']}\n"
+                    error_message = f"ci-dispatch-two-stage.yml currently only supports a single producer. "
+                    error_message += f"Found {num_producers} producers in '{group_name}':\n{producer_names}"
+                    print(f"::error file=ci/matrix.yaml::{error_message}", file=sys.stderr)
+                    raise Exception(error_message)
+
+    return workflow_dispatch_groups
+
+
+def pretty_print_workflow(final_workflow, outfile):
+    print(f"::group::Job list", file=outfile)
+
+    def print_job_array(total_jobs, key, group_json):
+        job_array = group_json[key] if key in group_json else []
+        key += ":"
+        for job_json in job_array:
+            total_jobs += 1
+            print(f"{total_jobs:4}   {key:13} {job_json['name']}", file=outfile)
+        return total_jobs
+
+    total_jobs = 0
+    for group_name, group_json in final_workflow.items():
+        print(f"{'':4} {group_name}:", file=outfile)
+        total_jobs = print_job_array(total_jobs, 'standalone', group_json)
+        if 'two_stage' in group_json:
+            for two_stage_json in group_json['two_stage']:
+                total_jobs = print_job_array(total_jobs, 'producers', two_stage_json)
+                total_jobs = print_job_array(total_jobs, 'consumers', two_stage_json)
+    print(f"::endgroup::", file=outfile)
+    print(f"Total jobs: {total_jobs}", file=outfile)
+
+    print("::group::Final Workflow JSON", file=outfile)
+    print(json.dumps(final_workflow, indent=2), file=outfile)
+    print("::endgroup::", file=outfile)
+
+
+def print_gha_workflow(args):
+    matrix_jobs = preprocess_matrix_jobs(matrix_yaml['workflows'][args.workflow])
+
+    # print("::group::Matrix Jobs", file=sys.stderr)
+    # print("Matrix Jobs:", file=sys.stderr)
+    # for matrix_job in matrix_jobs:
+    #     print(json.dumps(matrix_job, indent=None, separators=(',', ':')), file=sys.stderr)
+    # print("::end-group::", file=sys.stderr)
+
+    if args.dirty_projects:
+        matrix_jobs = filter_projects(matrix_jobs, args.dirty_projects)
+
+    workflow_dispatch_groups = {}
+    for matrix_job in matrix_jobs:
+        merge_dispatch_groups(workflow_dispatch_groups, matrix_job_to_dispatch_group(matrix_job))
+
+    final_workflow = finalize_workflow_dispatch_groups(workflow_dispatch_groups)
+
+    pretty_print_workflow(final_workflow, sys.stderr)
+
+    write_output("WORKFLOW_KEYS", json.dumps(list(final_workflow.keys()), indent=None, separators=(',', ':')))
+    write_output("WORKFLOW", json.dumps(final_workflow, indent=None, separators=(',', ':')))
+
+
+def print_devcontainer_info(args):
+    devcontainer_version = matrix_yaml['devcontainer_version']
+
+    matrix_jobs = []
+    for workflow in matrix_yaml['workflows']:
+        matrix_jobs.extend(matrix_yaml['workflows'][workflow])
+    matrix_jobs = preprocess_matrix_jobs(matrix_jobs)
+
+    # Remove all but the following keys from the matrix jobs:
+    keep_keys = ['ctk', 'host_compiler', 'os']
+    combinations = [{key: job[key] for key in keep_keys} for job in matrix_jobs]
+
+    # Remove duplicates and filter out windows jobs:
+    unique_combinations = []
+    for combo in combinations:
+        if not is_windows(combo) and combo not in unique_combinations:
+            unique_combinations.append(combo)
+
+    for combo in unique_combinations:
+        combo['compiler_name'] = combo['host_compiler']['name']
+        combo['compiler_version'] = combo['host_compiler']['version']
+        combo['compiler_exe'] = combo['host_compiler']['exe']
+        del combo['host_compiler']
+
+        combo['cuda'] = combo['ctk']
+        del combo['ctk']
+
+    devcontainer_json = {'devcontainer_version': devcontainer_version, 'combinations': unique_combinations}
+
+    # Pretty print the devcontainer json to stdout:
+    print(json.dumps(devcontainer_json, indent=2))
+
+
+def main():
+    global matrix_yaml
+
+    parser = argparse.ArgumentParser(description='Compute matrix for workflow')
+    parser.add_argument('matrix_file', help='Path to the matrix YAML file')
+    parser_mode = parser.add_mutually_exclusive_group(required=True)
+    parser_mode.add_argument('--workflow', help='Print GHA workflow [pull_request, nightly, weekly, etc]')
+    parser_mode.add_argument('--devcontainer-info', action='store_true',
+                             help='Print devcontainer info instead of GHA workflows.')
+    parser.add_argument('--dirty-projects', nargs='*', help='Filter jobs to only these projects')
+    args = parser.parse_args()
+
+    # Check if the matrix file exists
+    if not os.path.isfile(args.matrix_file):
+        print(f"Error: Matrix file '{args.matrix_file}' does not exist.")
+        sys.exit(1)
+
+    with open(args.matrix_file, 'r') as f:
+        matrix_yaml = yaml.safe_load(f)
+
+    # Check if the workflow is valid
+    if args.workflow and 'workflows' not in matrix_yaml:
+        print(f"Error: Workflow 'workflows.{args.workflow}' does not exist in the matrix YAML.")
+        sys.exit(1)
+
+    # Print usage if no arguments are provided
+    if not args.matrix_file and not args.workflow:
+        parser.print_usage()
+        sys.exit(1)
+
+    # print("Arguments:", file=sys.stderr)
+    # print(args, file=sys.stderr)
+    # print("::group::Matrix YAML", file=sys.stderr)
+    # print("Matrix YAML:", file=sys.stderr)
+    # print(matrix_yaml, file=sys.stderr)
+    # print("::end-group::", file=sys.stderr)
+
+    if args.workflow:
+        print_gha_workflow(args)
+    elif args.devcontainer_info:
+        print_devcontainer_info(args)
+    else:
+        print("Error: Either --workflow WORKFLOW or --devcontainers must be specified.", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/infra_cccl.sh b/ci/infra_cccl.sh
new file mode 100755
index 00000000000..475799ace26
--- /dev/null
+++ b/ci/infra_cccl.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+source "$(dirname "$0")/build_common.sh"
+
+print_environment_details
+
+PRESET="cccl-infra"
+
+CMAKE_OPTIONS=""
+
+GPU_REQUIRED="false"
+
+if [ -n "${GITHUB_SHA:-}" ]; then
+  CMAKE_OPTIONS="$CMAKE_OPTIONS -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}"
+fi
+
+configure_preset "CCCL Infra" "$PRESET" "$CMAKE_OPTIONS"
+test_preset "CCCL Infra" "$PRESET" "$GPU_REQUIRED"
+
+print_time_summary
diff --git a/ci/inspect_changes.sh b/ci/inspect_changes.sh
index 59500a70554..dce8e2033fd 100755
--- a/ci/inspect_changes.sh
+++ b/ci/inspect_changes.sh
@@ -21,6 +21,7 @@ base_sha=$(git merge-base $head_sha $base_sha)
 
 # Define a list of subproject directories:
 subprojects=(
+  cccl
   libcudacxx
   cub
   thrust
@@ -28,6 +29,7 @@ subprojects=(
 
 # ...and their dependencies:
 declare -A dependencies=(
+  [cccl]=""
   [libcudacxx]="cccl"
   [cub]="cccl libcudacxx thrust"
   [thrust]="cccl libcudacxx cub"
@@ -90,19 +92,6 @@ add_dependencies() {
   return 0
 }
 
-# write_subproject_status <subproject>
-# Write the output <subproject_uppercase>_DIRTY={true|false}
-write_subproject_status() {
-  local subproject="$1"
-  local dirty_flag=${subproject^^}_DIRTY
-
-  if [[ ${!dirty_flag} -ne 0 ]]; then
-    write_output "${dirty_flag}" "true"
-  else
-    write_output "${dirty_flag}" "false"
-  fi
-}
-
 main() {
   # Print the list of subprojects and all of their dependencies:
   echo "Subprojects: ${subprojects[*]}"
@@ -118,21 +107,30 @@ main() {
   echo
 
   # Print the list of files that have changed:
-  echo "Dirty files:"
+  echo "::group::Dirty files"
   dirty_files | sed 's/^/  - /'
-  echo ""
+  echo "::endgroup::"
+  echo
 
   echo "Modifications in project?"
+
   # Assign the return value of `inspect_cccl` to the variable `CCCL_DIRTY`:
   inspect_cccl
   CCCL_DIRTY=$?
-  echo "$(if [[ ${CCCL_DIRTY} -eq 0 ]]; then echo " "; else echo "X"; fi) - CCCL Infrastructure"
+  checkmark=$(if [[ ${CCCL_DIRTY} -eq 0 ]]; then echo " "; else echo "X"; fi)
+  echo "${checkmark} - CCCL Infrastructure"
 
   # Check for changes in each subprojects directory:
   for subproject in "${subprojects[@]}"; do
+    if [[ ${subproject} == "cccl" ]]; then
+      # Special case handled above.
+      continue
+    fi
+
     inspect_subdir $subproject
     declare ${subproject^^}_DIRTY=$?
-    echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}"
+    checkmark=$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi)
+    echo "${checkmark} - ${subproject}"
   done
   echo
 
@@ -140,13 +138,20 @@ main() {
   for subproject in "${subprojects[@]}"; do
     add_dependencies ${subproject}
     declare ${subproject^^}_DIRTY=$?
-    echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}"
+    checkmark=$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi)
+    echo "${checkmark} - ${subproject}"
   done
   echo
 
+  declare -a dirty_subprojects=()
   for subproject in "${subprojects[@]}"; do
-    write_subproject_status ${subproject}
+    var_name="${subproject^^}_DIRTY"
+    if [[ ${!var_name} -ne 0 ]]; then
+      dirty_subprojects+=("$subproject")
+    fi
   done
+
+  write_output "DIRTY_PROJECTS" "${dirty_subprojects[*]}"
 }
 
 main "$@"
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index 589de44bd3c..88aececded1 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -1,12 +1,6 @@
-
-cuda_prev_min: &cuda_prev_min '11.1'
-cuda_prev_max:  &cuda_prev_max  '11.8'
-cuda_curr: &cuda_curr '12.4'
-
-# The GPUs to test on
-gpus:
-  - 'a100'
-  - 'v100'
+ctk_prev_min: &ctk_prev_min '11.1'
+ctk_prev_max:  &ctk_prev_max  '11.8'
+ctk_curr: &ctk_curr '12.4'
 
 # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
 devcontainer_version: '24.06'
@@ -42,54 +36,156 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
 # oneAPI configs
 oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
 
-# Each environment below will generate a unique build/test job
-# See the "compute-matrix" job in the workflow for how this is parsed and used
-# cuda: The CUDA Toolkit version
-# os: The operating system used
-# cpu: The CPU architecture
-# compiler: The compiler to use
-#   name: The compiler name
-#   version: The compiler version
-#   exe: The unverionsed compiler binary name
-# std: The C++ standards to build for
-#    This field is unique as it will generate an independent build/test job for each value
-
-# Configurations that will run for every PR
-pull_request:
-  nvcc:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc6,     std: [11, 14],         jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9,    std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [14, 17],         jobs: ['build']}
-    - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11,    std: [11, 14, 17],     jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9,    std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10,   std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16,   std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [14, 17],         jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [14, 17, 20],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi,   std: [11, 14, 17],     jobs: ['build']}
-  nvrtc:
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', std: [11, 14, 17, 20]}
-  clang-cuda:
-    - {lib: ['thrust', 'cub', 'libcudacxx'], cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest, std: [17, 20]}
-  cccl-infra:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest}
+# GHA Workflow job matrices:
+workflows:
+  pull_request:
+    # default_projects: nvcc
+    - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc6,                         std: [11, 14]         }
+    - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*gcc7, *gcc8, *gcc9, *llvm9], std: [11, 14, 17]     }
+    - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *msvc2017,                     std: 14               }
+    - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11,                        std: [11, 14, 17],     cmake_cuda_arch: '60;70;80;90'}
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc7, *gcc8, *gcc9],         std: [11, 14, 17]     }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc10, *gcc11],              std: [11, 14, 17, 20] }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm9, *llvm10],             std: [11, 14, 17]     }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm11, *llvm12, *llvm13],   std: [11, 14, 17, 20] }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm14, *llvm15],            std: [11, 14, 17, 20] }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc12, *llvm16],             std: [11, 14, 17, 20], cpu: 'arm64'}
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc12,                        std: [11, 14, 17, 20], cmake_cuda_arch: '90a'}
+    - {job_types: ['test'],  ctk: *ctk_curr,     host_compiler: *gcc12,                        std: [11, 14, 17, 20] }
+    - {job_types: ['test'],  ctk: *ctk_curr,     host_compiler: *llvm16,                       std: [11, 14, 17, 20] }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *oneapi,                       std: [11, 14, 17]     }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2019,                     std: [14, 17]         }
+    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2022,                     std: [14, 17, 20]     }
+    # default_projects: clang-cuda
+    - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]}
+    # nvrtc:
+    - {job_types: ['nvrtc'], project: 'libcudacxx', ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]}
+    # verify-codegen:
+    - { job_types: ['verify_codegen'], project: 'libcudacxx'}
+    # cccl-infra:
+    - {job_types: ['infra'], project: 'cccl', ctk: *ctk_prev_min, host_compiler: [*gcc-oldest, *llvm-oldest]}
+    - {job_types: ['infra'], project: 'cccl', ctk: *ctk_curr,     host_compiler: [*gcc-newest, *llvm-newest]}
+  nightly:
+    - {job_types: ['test'],  ctk: *ctk_prev_min, gpu: 'v100',     cmake_cuda_arch: '70', host_compiler: *gcc6,   std: [11]             }
+    - {job_types: ['test'],  ctk: *ctk_prev_min, gpu: 't4',       cmake_cuda_arch: '75', host_compiler: *llvm9,  std: [17]             }
+    - {job_types: ['test'],  ctk: *ctk_prev_max, gpu: 'rtx2080',  cmake_cuda_arch: '75', host_compiler: *gcc11,  std: [17]             }
+    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc7,   std: [14]             }
+    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'l4',       cmake_cuda_arch: '89', host_compiler: *gcc12,  std: [11, 14, 17, 20] }
+    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'rtx4090',  cmake_cuda_arch: '89', host_compiler: *llvm9,  std: [11]             }
+    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90', host_compiler: *gcc12,  std: [11, 20]         }
+    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90', host_compiler: *llvm16, std: [17]             }
+    # nvrtc:
+    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 't4',       cmake_cuda_arch: '75', host_compiler: *gcc12,  std: [20],             project: ['libcudacxx']}
+    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc12,  std: [20],             project: ['libcudacxx']}
+    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'l4',       cmake_cuda_arch: '89', host_compiler: *gcc12,  std: [11, 14, 17, 20], project: ['libcudacxx']}
+    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90', host_compiler: *gcc12,  std: [11, 20],         project: ['libcudacxx']}
+
+#
+# Resources for compute_matrix.py:
+#
+
+# Error if tags are missing:
+required_tags: ['job_types']
+
+# Tags that will be added if not specified:
+defaulted_tags: ['ctk', 'cpu', 'gpu', 'host_compiler', 'device_compiler', 'project', 'os']
+
+# Tags that may be omitted:
+optional_tags: ['std', 'cmake_cuda_arch', 'cmake_options']
+
+# job_types that have an implied prerequisite 'build' job:
+build_required_job_types:
+  - 'test'
+
+# job_types that require a GPU
+gpu_required_job_types:
+  - 'test'
+  - 'nvrtc'
+  - 'infra' # cccl infra's example project test launches a kernel
+
+formatted_job_types: # Default: Capitalize first letter.
+  'nvrtc': 'NVRTC'
+  'verify_codegen': 'VerifyCodegen'
+
+formatted_project_names:
+  'libcudacxx': 'libcu++'
+  'cub': 'CUB'
+  'thrust': 'Thrust'
+  'cccl': 'CCCL'
+
+formatted_host_compiler_names:
+  'llvm': 'clang'
+  'oneapi': 'intel'
+  'cl': 'MSVC'
+
+# `default_<tag>`: Used when the tag is omitted.
+default_ctk: *ctk_curr
+default_device_compiler: 'nvcc'
+default_host_compiler: *gcc12
+default_cpu: 'amd64'
+default_gpu: 'v100'
+default_project:
+  - 'libcudacxx'
+  - 'cub'
+  - 'thrust'
+# Special handling: lookup map
+default_os_lookup:
+  'ctk11.1-gcc6': 'ubuntu18.04'
+  'ctk11.1-gcc7': 'ubuntu18.04'
+  'ctk11.1-gcc8': 'ubuntu18.04'
+  'ctk11.1-gcc9': 'ubuntu18.04'
+  'ctk11.1-llvm9': 'ubuntu18.04'
+  'ctk11.1-cl14.16': 'windows2022'
+  'ctk11.8-gcc11': 'ubuntu22.04'
+  'ctk12.4-gcc7': 'ubuntu20.04'
+  'ctk12.4-gcc8': 'ubuntu20.04'
+  'ctk12.4-gcc9': 'ubuntu20.04'
+  'ctk12.4-gcc10': 'ubuntu20.04'
+  'ctk12.4-gcc11': 'ubuntu22.04'
+  'ctk12.4-gcc12': 'ubuntu22.04'
+  'ctk12.4-llvm9': 'ubuntu20.04'
+  'ctk12.4-llvm10': 'ubuntu20.04'
+  'ctk12.4-llvm11': 'ubuntu20.04'
+  'ctk12.4-llvm12': 'ubuntu20.04'
+  'ctk12.4-llvm13': 'ubuntu20.04'
+  'ctk12.4-llvm14': 'ubuntu20.04'
+  'ctk12.4-llvm15': 'ubuntu22.04'
+  'ctk12.4-llvm16': 'ubuntu22.04'
+  'ctk12.4-cl14.29': 'windows2022'
+  'ctk12.4-cl14.39': 'windows2022'
+  'ctk12.4-oneapi2023.2.0': 'ubuntu22.04'
+
+gpus:
+  - 'v100'     # ?? runners
+  - 't4'       #  8 runners
+  - 'rtx2080'  #  8 runners
+  - 'rtxa6000' # 12 runners
+  - 'l4'       # 48 runners
+  - 'rtx4090'  # 10 runners
+  - 'h100'     # 16 runners
+
+gpus_sm:
+  'v100':     '70'
+  't4':       '75'
+  'rtx2080':  '75'
+  'rtxa6000': '86'
+  'l4':       '89'
+  'rtx4090':  '89'
+  'h100':     '90'
+
+gpus_mem_gb:
+  'v100':     '32'
+  't4':       '16'
+  'rtx2080':  '8'
+  'rtxa6000': '48'
+  'l4':       '24'
+  'rtx4090':  '24'
+  'h100':     '80'
+
+testing_pool_gpus:
+  - 't4'
+  - 'rtx2080'
+  - 'rtxa6000'
+  - 'l4'
+  - 'rtx4090'
+  - 'h100'
diff --git a/ci/verify_codegen.sh b/ci/verify_codegen_libcudacxx.sh
similarity index 100%
rename from ci/verify_codegen.sh
rename to ci/verify_codegen_libcudacxx.sh