diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh index f868cc14f17..6572e37b1ec 100755 --- a/.devcontainer/make_devcontainers.sh +++ b/.devcontainer/make_devcontainers.sh @@ -74,6 +74,7 @@ while [[ $# -gt 0 ]]; do done MATRIX_FILE="../ci/matrix.yaml" +COMPUTE_MATRIX="../ci/compute-matrix.py" # Enable verbose mode if requested if [ "$VERBOSE" = true ]; then @@ -82,16 +83,17 @@ if [ "$VERBOSE" = true ]; then fi # Read matrix.yaml and convert it to json -matrix_json=$(yq -o json ${MATRIX_FILE}) +matrix_json=$(python3 ${COMPUTE_MATRIX} ${MATRIX_FILE} --devcontainer-info) -# Exclude Windows environments -readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))') +if [ "$VERBOSE" = true ]; then + echo "$matrix_json" +fi # Get the devcontainer image version and define image tag root readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version') # Get unique combinations of cuda version, compiler name/version, and Ubuntu version -readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]') +readonly combinations=$(echo "$matrix_json" | jq -c '.combinations[]') # Update the base devcontainer with the default values # The root devcontainer.json file is used as the default container as well as a template for all diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml deleted file mode 100644 index b8155e7aa65..00000000000 --- a/.github/actions/compute-matrix/action.yml +++ /dev/null @@ -1,25 +0,0 @@ - -name: Compute Matrix -description: "Compute the matrix for a given matrix type from the specified matrix file" - -inputs: - matrix_query: - description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc" - required: true - matrix_file: - description: 'The file containing the matrix' - required: true -outputs: - matrix: - description: 'The requested matrix' - value: ${{ steps.compute-matrix.outputs.MATRIX }} - -runs: - using: "composite" - steps: - - name: Compute matrix - id: compute-matrix - run: | - MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} ) - echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT - shell: bash -euxo pipefail {0} diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh deleted file mode 100755 index 1629836d216..00000000000 --- a/.github/actions/compute-matrix/compute-matrix.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -write_output() { - local key="$1" - local value="$2" - echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" -} - -explode_std_versions() { - jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))' -} - -explode_libs() { - jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))' -} - -# Filter out the libraries that are dirty -filter_libs() { - all_libs=("libcudacxx" "thrust" "cub") - dirty_libs=() - for lib in "${all_libs[@]}"; do - dirty_var_name="${lib^^}_DIRTY" - # If the variable named in dirty_var_name is not set, set it to false: - : "${!dirty_var_name:=false}" - # Declare a nameref to the variable named in dirty_var_name - declare -n lib_dirty="$dirty_var_name" - # echo "${lib^^}_DIRTY: ${lib_dirty}" >> /dev/stderr - if [ "${lib_dirty}" = "true" ]; then - dirty_libs+=("$lib") - fi - done - # echo "Dirty libraries: ${dirty_libs[@]}" >> /dev/stderr - - # Construct a regex to filter out the dirty libraries - dirty_lib_regex=$(IFS="|"; echo "${dirty_libs[*]}") - dirty_lib_regex="^(${dirty_lib_regex})\$" - jq_filter="map(select(.lib | test(\"$dirty_lib_regex\")))" - jq -cr "$jq_filter" -} - -extract_matrix() { - local file="$1" - local type="$2" - local matrix=$(yq -o=json "$file" | jq -cr ".$type") - write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')" - - local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )" - local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')" - write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix" - write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')" - - write_output "NVRTC_MATRIX" "$(echo "$matrix" | jq '.nvrtc' | explode_std_versions)" - - local clang_cuda_matrix="$(echo "$matrix" | jq -cr '.["clang-cuda"]' | explode_std_versions | explode_libs | filter_libs)" - write_output "CLANG_CUDA_MATRIX" "$clang_cuda_matrix" - write_output "CCCL_INFRA_MATRIX" "$(echo "$matrix" | jq -cr '.["cccl-infra"]' )" -} - -main() { - if [ "$1" == "-v" ]; then - set -x - shift - fi - - if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then - echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE" - echo " -v : Enable verbose output" - echo " MATRIX_FILE : The path to the matrix file." - echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'" - exit 1 - fi - - echo "Input matrix file:" >&2 - cat "$1" >&2 - echo "Matrix Type: $2" >&2 - - extract_matrix "$1" "$2" -} - -main "$@" diff --git a/.github/workflows/ci-dispatch-group.yml b/.github/workflows/ci-dispatch-group.yml new file mode 100644 index 00000000000..1d5b2e465b1 --- /dev/null +++ b/.github/workflows/ci-dispatch-group.yml @@ -0,0 +1,45 @@ +name: "CI/Dispatch/Group" + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + workflow_call: + inputs: + name: {type: string, required: true} + jobs: {type: string, required: true} + +permissions: + contents: read + +jobs: + standlone-jobs: + name: '[Standalone]' + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.jobs)['standalone']}} + uses: ./.github/workflows/ci-dispatch-job.yml + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + + two-stage-jobs: + name: '[TwoStage]' + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.jobs)['two_stage']}} + uses: ./.github/workflows/ci-dispatch-two-stage.yml + with: + producers: ${{ toJSON(matrix.producers) }} + consumers: ${{ toJSON(matrix.consumers) }} diff --git a/.github/workflows/ci-dispatch-job.yml b/.github/workflows/ci-dispatch-job.yml new file mode 100644 index 00000000000..aec1f06dcfa --- /dev/null +++ b/.github/workflows/ci-dispatch-job.yml @@ -0,0 +1,145 @@ +name: "CI/Dispatch/Job" + +# Important note about depending on this workflow: The `result` will be a failure, even if successful. +# +# This reusable workflow dispatches to a number of internal jobs. Only one job will run, +# and some may be in error states due to empty matrices (which are used instead of `if` to keep +# skipped dispatch jobs out of the GHA UI). The `continue-on-error` flag should prevent these +# errors from failing the workflow, but this does not work. +# +# Thus, the `result` of this workflow will always be a failure, even if the job itself is successful. +# To depend on this job, you should use the `success` output instead: +# +# ``` +# dependent_job: +# needs: dispatch-job +# if: ${{ !cancelled() && needs.dispatch-job.outputs.success }} +# ``` + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + workflow_call: + outputs: + success: + value: ${{ contains(toJSON(jobs.*.outputs.success), 'true') }} + inputs: + name: {type: string, required: true} + image: {type: string, required: true} + runner: {type: string, required: true} + command: {type: string, required: true} + env: {type: string, required: false} + dummy_matrix: {type: string, required: false, default: '[{"valid": true}]'} + +permissions: + contents: read + +jobs: + linux: + name: ${{inputs.name}} + continue-on-error: ${{ ! startsWith(inputs.runner, 'linux') }} + outputs: + success: ${{ steps.done.outputs.SUCCESS }} + permissions: + id-token: write + contents: read + strategy: + matrix: + include: ${{ fromJSON(startsWith(inputs.runner, 'linux') && inputs.dummy_matrix || '[]') }} + runs-on: ${{inputs.runner}} + container: + options: -u root + image: ${{inputs.image}} + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + path: cccl + persist-credentials: false + - name: Move files to coder user home directory + run: | + cp -R cccl /home/coder/cccl + chown -R coder:coder /home/coder/ + - name: Add NVCC problem matcher + run: | + echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json" + - name: Configure credentials and environment variables for sccache + uses: ./cccl/.github/actions/configure_cccl_sccache + - name: Run command + shell: su coder {0} + env: + COMMAND: ${{inputs.command}} + run: | + set -eo pipefail + cd ~/cccl + echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m" + echo -e "\e[1;34m${COMMAND}\e[0m" + eval "${COMMAND}" || exit_code=$? + if [ ! -z "$exit_code" ]; then + echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m" + echo "::error:: To replicate this failure locally, follow the steps below:" + echo "1. Clone the repository, and navigate to the correct branch and commit:" + echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA" + echo "" + echo "2. Run the failed command inside the same Docker container used by the CI:" + echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}" + echo "" + echo "For additional information, see:" + echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md" + echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md" + exit $exit_code + fi + - name: Mark job as successful + id: done + run: echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT} + + windows: + name: ${{inputs.name}} + continue-on-error: ${{ ! startsWith(inputs.runner, 'windows') }} + outputs: + success: ${{ steps.done.outputs.SUCCESS }} + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{ fromJSON(startsWith(inputs.runner, 'windows') && inputs.dummy_matrix || '[]') }} + runs-on: ${{inputs.runner}} + env: + SCCACHE_BUCKET: rapids-sccache-devs + SCCACHE_REGION: us-east-2 + SCCACHE_IDLE_TIMEOUT: 0 + SCCACHE_S3_USE_SSL: true + SCCACHE_S3_NO_CREDENTIALS: false + steps: + - name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + aws-region: us-east-2 + role-duration-seconds: 43200 # 12 hours + - name: Fetch ${{ inputs.image }} + run: docker pull ${{ inputs.image }} + - name: Run Command + run: >- + docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}') + [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}') + [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}') + git clone https://github.com/NVIDIA/cccl.git; + cd cccl; + git fetch --all; + git checkout ${{github.ref_name}}; + ${{inputs.command}}" + - name: Mark job as successful + id: done + run: echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT} diff --git a/.github/workflows/ci-dispatch-two-stage.yml b/.github/workflows/ci-dispatch-two-stage.yml new file mode 100644 index 00000000000..9f4e2cb5b22 --- /dev/null +++ b/.github/workflows/ci-dispatch-two-stage.yml @@ -0,0 +1,54 @@ +name: "CI/Dispatch/TwoStage" + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + workflow_call: + inputs: + producers: {type: string, required: true} + consumers: {type: string, required: true} + +permissions: + contents: read + +jobs: + producers: + name: '[Producer]' + # It is impossible to accumulate output variables across a matrix, and we cannot rely on the results of the dispatch-job workflow to determine success. + # See the note in ci-dispatch-job.yml for more information. + # + # Since we cannot accumulate results from multiple producers, only support a single producer for now. + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.producers)}} + uses: ./.github/workflows/ci-dispatch-job.yml + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + + consumers: + name: '[Consumer]' + needs: producers + # dispatch-job's result is always false, check the outputs instead. See ci-dispatch-job.yml for more information. + if: ${{ !cancelled() && fromJson(needs.producers.outputs.success) }} + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.consumers)}} + uses: ./.github/workflows/ci-dispatch-job.yml + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml index 7b5ed4ef272..3f8227152ed 100644 --- a/.github/workflows/dispatch-build-and-test.yml +++ b/.github/workflows/dispatch-build-and-test.yml @@ -4,6 +4,7 @@ on: workflow_call: inputs: project_name: {type: string, required: true} + job_type: {type: string, required: true} per_cuda_compiler_matrix: {type: string, required: true} devcontainer_version: {type: string, required: true} is_windows: {type: boolean, required: true} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 00000000000..36bfe89969d --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,93 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the main workflow that runs on every PR and push to main +name: nightly + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + # FIXME: This should be a cron job that runs nightly + push: # Testing only + branches: + - "pull-request/[0-9]+" + # schedule: + # - cron: '0 7 * * *' # 7AM UTC, 12AM PST, 3AM EST + +# Only runs one instance of this workflow at a time. Cancels any in-progress runs when a new one starts. +concurrency: + group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: read + +jobs: + compute-matrix: + name: Compute matrix + runs-on: ubuntu-latest + outputs: + WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}} + WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}} + steps: + - name: Get Base Branch from PR + id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + - name: Checkout repo + uses: actions/checkout@v3 + - name: Identify dirty subprojects + id: inspect-changes + run: | + ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA} + env: + BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} + - name: Compute matrix outputs + id: compute-matrix + run: | + ci/compute-matrix.py ci/matrix.yaml --workflow ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }} + + dispatch-groups: + name: ${{ matrix.name }} + needs: + - compute-matrix + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }} + uses: ./.github/workflows/ci-dispatch-group.yml + with: + name: ${{ matrix.name }} + jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }} + + # This job is the final job that runs after all other jobs and is used for branch protection status checks. + # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks + # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101 + ci: + runs-on: ubuntu-latest + name: CI + if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success + needs: + - dispatch-groups + steps: + - name: Check status of all precursor jobs + if: >- + ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}} + run: exit 1 diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 3dcee0cf6c6..b1f293a6119 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -14,7 +14,7 @@ # limitations under the License. # This is the main workflow that runs on every PR and push to main -name: pr +name: pull_request defaults: run: @@ -35,13 +35,13 @@ permissions: pull-requests: read jobs: - inspect-changes: - name: "Inspect Changes" + compute-matrix: + name: Compute matrix runs-on: ubuntu-latest outputs: - LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }} - CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }} - THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }} + DEVCONTAINER_VERSION: ${{steps.compute-matrix.outputs.DEVCONTAINER_VERSION}} + WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}} + WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}} steps: - name: Get Base Branch from PR id: get-pr-info @@ -49,154 +49,31 @@ jobs: - name: Checkout repo uses: actions/checkout@v3 - name: Identify dirty subprojects - id: set-outputs + id: inspect-changes run: | ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA} env: BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} - - compute-matrix: - name: Compute matrix - runs-on: ubuntu-latest - needs: - - inspect-changes - outputs: - DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}} - PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}} - PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}} - NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}} - CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}} - CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}} - steps: - - name: Checkout repo - uses: actions/checkout@v3 - name: Compute matrix outputs - id: set-outputs + id: compute-matrix run: | - .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request - env: - THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }} - CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }} - LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }} - - nvrtc: - name: libcudacxx NVRTC CUDA${{matrix.cuda}} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} - uses: ./.github/workflows/run-as-coder.yml - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }} - with: - name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}} - runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}} - - thrust: - name: Thrust CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "thrust" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} + ci/compute-matrix.py ci/matrix.yaml --workflow ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }} - cub: - name: CUB CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "cub" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - libcudacxx: - name: libcudacxx CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read + dispatch-groups: + name: ${{ matrix.name }} needs: - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "libcudacxx" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - clang-cuda: - name: ${{matrix.lib}} Clang CUDA - permissions: - id-token: write - contents: read - needs: compute-matrix - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }} - uses: ./.github/workflows/run-as-coder.yml - with: - name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}} - runner: linux-${{matrix.cpu}}-cpu16 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}" - - cccl-infra: - name: CCCL Infrastructure permissions: id-token: write contents: read - needs: compute-matrix - if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }} strategy: fail-fast: false matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }} - uses: ./.github/workflows/run-as-coder.yml + name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }} + uses: ./.github/workflows/ci-dispatch-group.yml with: - name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}} - runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} - ctest --preset=cccl-infra + name: ${{ matrix.name }} + jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }} verify-devcontainers: name: Verify Dev Containers @@ -205,20 +82,6 @@ jobs: contents: read uses: ./.github/workflows/verify-devcontainers.yml - verify-codegen: - name: Verify Codegen in libcudacxx - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Run libcudacxx codegen verification - id: verify-codegen - run: | - sudo apt-get update - sudo apt-get install ninja-build - export CXX="g++" - ./ci/verify_codegen.sh - # This job is the final job that runs after all other jobs and is used for branch protection status checks. # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101 @@ -227,14 +90,8 @@ jobs: name: CI if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success needs: - - clang-cuda - - cub - - libcudacxx - - nvrtc - - thrust - - cccl-infra + - dispatch-groups - verify-devcontainers - - verify-codegen steps: - name: Check status of all precursor jobs if: >- diff --git a/ci/build_common.sh b/ci/build_common.sh index 239d463eadc..5d36919c4a1 100755 --- a/ci/build_common.sh +++ b/ci/build_common.sh @@ -216,10 +216,13 @@ function test_preset() { local BUILD_NAME=$1 local PRESET=$2 - local GROUP_NAME="🚀 Test ${BUILD_NAME}" + local GPU_REQUIRED=${3:-"true"} - fail_if_no_gpu + if [ $GPU_REQUIRED -eq "true" ]; then + fail_if_no_gpu + fi + local GROUP_NAME="🚀 Test ${BUILD_NAME}" ctest_log_dir="${BUILD_DIR}/log/ctest" ctest_log="${ctest_log_dir}/${PRESET}" diff --git a/ci/compute-matrix.py b/ci/compute-matrix.py new file mode 100755 index 00000000000..eaa47b2b2de --- /dev/null +++ b/ci/compute-matrix.py @@ -0,0 +1,592 @@ +#!/usr/bin/env python3 + +""" +Concepts: +- matrix_job: an entry of a workflow matrix, converted from matrix.yaml["workflow"][id] into a JSON object. + Example: + { + "job_types": [ + "test" + ], + "ctk": "11.1", + "gpu": "t4", + "cmake_cuda_arch": "75-real", + "host_compiler": { + "name": "llvm", + "version": "9", + "exe": "clang++" + }, + "std": [ + 17 + ], + "project": [ + "libcudacxx", + "cub", + "thrust" + ], + "os": "ubuntu18.04" + } + +Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where +the matrix job is turned into one or more dispatch groups consisting of potentially many jobs. + +- dispatch_group_json: A json object used in conjunction with the ci-dispatch-groups.yml GHA workflow. + Example: + { + "": { + "standalone": [ {}, ... ] + "two_stage": [ {}, ] + } + } + +- two_stage_json: A json object that represents bulk-synchronous producer/consumer jobs, used with ci-dispatch-two-stage.yml. + Example: + { + "producers": [ {}, ... ], + "consumers": [ {}, ... ] + } + +- job_json: A json object that represents a single job in a workflow. Used with ci-dispatch-job.yml. + Example: + { + name: "...", + runner: "...", + image: "...", + command: "..." }, + } +""" + +import argparse +import copy +import json +import os +import re +import sys +import yaml + +matrix_yaml = None + + +def write_output(key, value): + print(f"::group::GHA Output: {key}") + print(f"{key}={value}") + print("::endgroup::") + + # Check if the GITHUB_OUTPUT environment variable is set, and write to that file if it is. + output_file = os.environ.get('GITHUB_OUTPUT') + if output_file: + with open(output_file, 'a') as f: + print(f"{key}={value}", file=f) + + +def lookup_os(ctk, host_compiler_name, host_compiler_version): + key = f'ctk{ctk}-{host_compiler_name}{host_compiler_version}' + return matrix_yaml['default_os_lookup'][key] + + +def get_formatted_projected_name(project_name): + if project_name in matrix_yaml['formatted_project_names']: + return matrix_yaml['formatted_project_names'][project_name] + return project_name + + +def get_formatted_host_compiler_name(host_compiler): + config_name = host_compiler['name'] + if config_name in matrix_yaml['formatted_host_compiler_names']: + return matrix_yaml['formatted_host_compiler_names'][config_name] + return config_name + + +def get_formatted_job_type(job_type): + if job_type in matrix_yaml['formatted_job_types']: + return matrix_yaml['formatted_job_types'][job_type] + # Return with first letter capitalized: + return job_type.capitalize() + + +def is_windows(matrix_job): + return matrix_job['os'].startswith('windows') + + +def validate_required_tags(matrix_job): + for tag in matrix_yaml['required_tags']: + if tag not in matrix_job: + raise Exception(f"Missing required tag '{tag}' in matrix job {matrix_job}") + + required_tags = set(matrix_yaml['required_tags']) + defaulted_tags = set(matrix_yaml['defaulted_tags']) + optional_tags = set(matrix_yaml['optional_tags']) + all_tags = required_tags | defaulted_tags | optional_tags + + for tag in matrix_job: + if tag not in all_tags: + raise Exception(f"Unknown tag '{tag}' in matrix job {matrix_job}") + + +def set_default_tags(matrix_job): + generic_defaults = set(matrix_yaml['defaulted_tags']) + generic_defaults -= set(['os']) # handled specially. + + for tag in generic_defaults: + if tag not in matrix_job: + matrix_job[tag] = matrix_yaml['default_'+tag] + + +def set_derived_tags(matrix_job): + if 'os' not in matrix_job: + matrix_job['os'] = lookup_os(matrix_job['ctk'], + matrix_job['host_compiler']['name'], + matrix_job['host_compiler']['version']) + + # Expand nvcc device compiler shortcut: + if matrix_job['device_compiler'] == 'nvcc': + matrix_job['device_compiler'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'} + + +def generate_dispatch_group_name(matrix_job): + project_name = get_formatted_projected_name(matrix_job['project']) + ctk = matrix_job['ctk'] + device_compiler = matrix_job['device_compiler'] + host_compiler_name = get_formatted_host_compiler_name(matrix_job['host_compiler']) + + compiler_info = "" + if device_compiler['name'] == 'nvcc': + compiler_info = f"nvcc {host_compiler_name}" + elif device_compiler['name'] == 'llvm': + compiler_info = f"clang-cuda" + else: + compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler_name}" + + return f"{project_name} {compiler_info} CTK{ctk}" + + +def generate_dispatch_job_name(matrix_job, job_type): + std_str = ("C++" + str(matrix_job['std']) + " ") if 'std' in matrix_job else '' + cpu_str = matrix_job['cpu'] + gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_job_types'] else "" + cuda_compile_arch = (" sm{" + matrix_job['cmake_cuda_arch'] + "}") if 'cmake_cuda_arch' in matrix_job else "" + cmake_options = (' ' + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else "" + + host_compiler_name = get_formatted_host_compiler_name(matrix_job['host_compiler']) + host_compiler_info = f"{host_compiler_name}{matrix_job['host_compiler']['version']}" + + config_tag = f"{std_str}{host_compiler_info}" + + formatted_job_type = get_formatted_job_type(job_type) + + extra_info = f":{cuda_compile_arch}{cmake_options}" if cuda_compile_arch or cmake_options else "" + + return f"[{config_tag}] {formatted_job_type}({cpu_str}{gpu_str}){extra_info}" + + +def generate_dispatch_job_runner(matrix_job, job_type): + runner_os = "windows" if is_windows(matrix_job) else "linux" + cpu = matrix_job['cpu'] + + if not job_type in matrix_yaml['gpu_required_job_types']: + return f"{runner_os}-{cpu}-cpu16" + + gpu = matrix_job['gpu'] + suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else "" + + return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}" + + +def generate_dispatch_job_image(matrix_job, job_type): + devcontainer_version = matrix_yaml['devcontainer_version'] + ctk = matrix_job['ctk'] + image_os = matrix_job['os'] + host_compiler = matrix_job['host_compiler']['name'] + \ + matrix_job['host_compiler']['version'] + + if is_windows(matrix_job): + return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}" + + return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}" + + +def generate_dispatch_job_command(matrix_job, job_type): + script_path = "ci/windows" if is_windows(matrix_job) else "ci" + script_ext = ".ps1" if is_windows(matrix_job) else ".sh" + script_job_type = job_type + script_project = matrix_job['project'] + script_name = f"{script_path}/{script_job_type}_{script_project}{script_ext}" + + std_str = str(matrix_job['std']) if 'std' in matrix_job else '' + + host_compiler_exe = matrix_job['host_compiler']['exe'] + device_compiler_name = matrix_job['device_compiler']['name'] + device_compiler_exe = matrix_job['device_compiler']['exe'] + + cuda_compile_arch = matrix_job['cmake_cuda_arch'] if 'cmake_cuda_arch' in matrix_job else '' + cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else '' + + command = f"\"{script_name}\"" + if std_str: + command += f" -std \"{std_str}\"" + if cuda_compile_arch: + command += f" -arch \"{cuda_compile_arch}\"" + if device_compiler_name != 'nvcc': + command += f" -cuda \"{device_compiler_exe}\"" + if cmake_options: + cmake_args = " ".join([f"{key}={value}" for key, value in cmake_options.items()]) + command += f" -cmake-options \"{cmake_args}\"" + + return command + + +def generate_dispatch_job_json(matrix_job, job_type): + return { + 'name': generate_dispatch_job_name(matrix_job, job_type), + 'runner': generate_dispatch_job_runner(matrix_job, job_type), + 'image': generate_dispatch_job_image(matrix_job, job_type), + 'command': generate_dispatch_job_command(matrix_job, job_type) + } + + +# Create a single build producer, and a separate consumer for each test_job_type: +def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types): + build_json = generate_dispatch_job_json(matrix_job, build_job_type) + + test_json = [] + for test_job_type in test_job_types: + test_json.append(generate_dispatch_job_json(matrix_job, test_job_type)) + + return { + "producers": [build_json], + "consumers": test_json + } + + +def generate_dispatch_group_jobs(matrix_job): + dispatch_group_jobs = { + "standalone": [], + "two_stage": [] + } + + job_types = set(copy.deepcopy(matrix_job['job_types'])) + + # job_types that appear in build_required_job_types: + build_required = set(matrix_yaml['build_required_job_types']) & job_types + has_build_and_test = len(build_required) > 0 + job_types -= build_required + + has_standalone_build = 'build' in job_types and not has_build_and_test + job_types -= {'build'} + + if has_standalone_build: + dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, "build")) + elif has_build_and_test: + dispatch_group_jobs['two_stage'].append( + generate_dispatch_build_and_test_json(matrix_job, "build", build_required)) + + # Remaining jobs are assumed to be standalone (e.g. nvrtc): + for job_type in job_types: + dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type)) + + return dispatch_group_jobs + + +def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups): + for group_name, group_json in new_dispatch_groups.items(): + if group_name not in accum_dispatch_groups: + accum_dispatch_groups[group_name] = group_json + else: + # iterate standalone and two_stage: + for key, value in group_json.items(): + accum_dispatch_groups[group_name][key] += value + + +def matrix_job_to_dispatch_group(matrix_job): + return {generate_dispatch_group_name(matrix_job): generate_dispatch_group_jobs(matrix_job)} + + +def explode_tags(matrix_job): + explode_tag = None + for tag in matrix_job: + if tag != "job_types" and isinstance(matrix_job[tag], list): + explode_tag = tag + break + + if not explode_tag: + return [matrix_job] + + result = [] + for value in matrix_job[explode_tag]: + new_job = copy.deepcopy(matrix_job) + new_job[explode_tag] = value + result.extend(explode_tags(new_job)) + + return result + + +def preprocess_matrix_jobs(matrix_jobs): + result = [] + for matrix_job in matrix_jobs: + validate_required_tags(matrix_job) + set_default_tags(matrix_job) + for job in explode_tags(matrix_job): + set_derived_tags(job) + result.append(job) + return result + + +def filter_projects(matrix_jobs, projects): + return [job for job in matrix_jobs if job['project'] in projects] + + +def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig): + workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig) + + # Merge consumers for any two_stage arrays that have the same producer(s). Print a warning. + for group_name, group_json in workflow_dispatch_groups.items(): + if not 'two_stage' in group_json: + continue + two_stage_json = group_json['two_stage'] + merged_producers = [] + merged_consumers = [] + for two_stage in two_stage_json: + producers = two_stage['producers'] + consumers = two_stage['consumers'] + if producers in merged_producers: + producer_index = merged_producers.index(producers) + matching_consumers = merged_consumers[producer_index] + + producer_names = ", ".join([producer['name'] for producer in producers]) + print(f"::notice file=ci/matrix.yaml::Merging consumers for duplicate producer '{producer_names}' in '{group_name}'", + file=sys.stderr) + consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers]) + print(f"::notice file=ci/matrix.yaml::Original consumers: {consumer_names}", file=sys.stderr) + consumer_names = ", ".join([consumer['name'] for consumer in consumers]) + print(f"::notice file=ci/matrix.yaml::Duplicate consumers: {consumer_names}", file=sys.stderr) + # Merge if unique: + for consumer in consumers: + if consumer not in matching_consumers: + matching_consumers.append(consumer) + consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers]) + print(f"::notice file=ci/matrix.yaml::Merged consumers: {consumer_names}", file=sys.stderr) + else: + merged_producers.append(producers) + merged_consumers.append(consumers) + # Update with the merged lists: + two_stage_json = [] + for producers, consumers in zip(merged_producers, merged_consumers): + two_stage_json.append({'producers': producers, 'consumers': consumers}) + group_json['two_stage'] = two_stage_json + + # Check for any duplicate jobs in standalone arrays. Warn and remove duplicates. + for group_name, group_json in workflow_dispatch_groups.items(): + standalone_jobs = group_json['standalone'] if 'standalone' in group_json else [] + unique_standalone_jobs = [] + for job_json in standalone_jobs: + if job_json in unique_standalone_jobs: + print(f"::notice file=ci/matrix.yaml::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'", + file=sys.stderr) + else: + unique_standalone_jobs.append(job_json) + + # If any producer/consumer jobs exist in standalone arrays, warn and remove the standalones. + two_stage_jobs = group_json['two_stage'] if 'two_stage' in group_json else [] + for two_stage_job in two_stage_jobs: + for producer in two_stage_job['producers']: + if producer in unique_standalone_jobs: + print(f"::notice file=ci/matrix.yaml::Removing standalone job '{producer['name']}' " + + f"as it appears as a producer in '{group_name}'", + file=sys.stderr) + unique_standalone_jobs.remove(producer) + for consumer in two_stage_job['consumers']: + if consumer in unique_standalone_jobs: + print(f"::notice file=ci/matrix.yaml::Removing standalone job '{consumer['name']}' " + + f"as it appears as a consumer in '{group_name}'", + file=sys.stderr) + unique_standalone_jobs.remove(consumer) + standalone_jobs = list(unique_standalone_jobs) + + # If any producer or consumer job appears more than once, warn and leave as-is. + all_two_stage_jobs = [] + duplicate_jobs = {} + for two_stage_job in two_stage_jobs: + for job in two_stage_job['producers'] + two_stage_job['consumers']: + if job in all_two_stage_jobs: + duplicate_jobs[job['name']] = duplicate_jobs.get(job['name'], 1) + 1 + else: + all_two_stage_jobs.append(job) + for job_name, count in duplicate_jobs.items(): + print(f"::warning file=ci/matrix.yaml::" + + f"Job '{job_name}' appears {count} times in '{group_name}'.", + f"Cannot remove duplicate while resolving dependencies. This job WILL execute {count} times.", + file=sys.stderr) + + # Remove all named values that contain an empty list of jobs: + for group_name, group_json in workflow_dispatch_groups.items(): + if not group_json['standalone'] and not group_json['two_stage']: + del workflow_dispatch_groups[group_name] + elif not group_json['standalone']: + del group_json['standalone'] + elif not group_json['two_stage']: + del group_json['two_stage'] + + # Natural sort impl (handles embedded numbers in strings, case insensitive) + def natural_sort_key(key): + return [(int(text) if text.isdigit() else text.lower()) for text in re.split('(\d+)', key)] + + # Sort the dispatch groups by name: + workflow_dispatch_groups = dict(sorted(workflow_dispatch_groups.items(), key=lambda x: natural_sort_key(x[0]))) + + # Sort the jobs within each dispatch group: + for group_name, group_json in workflow_dispatch_groups.items(): + if 'standalone' in group_json: + group_json['standalone'] = sorted(group_json['standalone'], key=lambda x: natural_sort_key(x['name'])) + if 'two_stage' in group_json: + group_json['two_stage'] = sorted( + group_json['two_stage'], key=lambda x: natural_sort_key(x['producers'][0]['name'])) + + # Check to see if any .two_stage.producers arrays have more than 1 job, which is not supported. See ci-dispatch-two-stage.yml for details. + for group_name, group_json in workflow_dispatch_groups.items(): + if 'two_stage' in group_json: + for two_stage_json in group_json['two_stage']: + num_producers = len(two_stage_json['producers']) + if num_producers > 1: + producer_names = "" + for job in two_stage_json['producers']: + producer_names += f" - {job['name']}\n" + error_message = f"ci-dispatch-two-stage.yml currently only supports a single producer. " + error_message += f"Found {num_producers} producers in '{group_name}':\n{producer_names}" + print(f"::error file=ci/matrix.yaml::{error_message}", file=sys.stderr) + raise Exception(error_message) + + return workflow_dispatch_groups + + +def pretty_print_workflow(final_workflow, outfile): + print(f"::group::Job list", file=outfile) + + def print_job_array(total_jobs, key, group_json): + job_array = group_json[key] if key in group_json else [] + key += ":" + for job_json in job_array: + total_jobs += 1 + print(f"{total_jobs:4} {key:13} {job_json['name']}", file=outfile) + return total_jobs + + total_jobs = 0 + for group_name, group_json in final_workflow.items(): + print(f"{'':4} {group_name}:", file=outfile) + total_jobs = print_job_array(total_jobs, 'standalone', group_json) + if 'two_stage' in group_json: + for two_stage_json in group_json['two_stage']: + total_jobs = print_job_array(total_jobs, 'producers', two_stage_json) + total_jobs = print_job_array(total_jobs, 'consumers', two_stage_json) + print(f"::endgroup::", file=outfile) + print(f"Total jobs: {total_jobs}", file=outfile) + + print("::group::Final Workflow JSON", file=outfile) + print(json.dumps(final_workflow, indent=2), file=outfile) + print("::endgroup::", file=outfile) + + +def print_gha_workflow(args): + matrix_jobs = preprocess_matrix_jobs(matrix_yaml['workflows'][args.workflow]) + + # print("::group::Matrix Jobs", file=sys.stderr) + # print("Matrix Jobs:", file=sys.stderr) + # for matrix_job in matrix_jobs: + # print(json.dumps(matrix_job, indent=None, separators=(',', ':')), file=sys.stderr) + # print("::end-group::", file=sys.stderr) + + if args.dirty_projects: + matrix_jobs = filter_projects(matrix_jobs, args.dirty_projects) + + workflow_dispatch_groups = {} + for matrix_job in matrix_jobs: + merge_dispatch_groups(workflow_dispatch_groups, matrix_job_to_dispatch_group(matrix_job)) + + final_workflow = finalize_workflow_dispatch_groups(workflow_dispatch_groups) + + pretty_print_workflow(final_workflow, sys.stderr) + + write_output("WORKFLOW_KEYS", json.dumps(list(final_workflow.keys()), indent=None, separators=(',', ':'))) + write_output("WORKFLOW", json.dumps(final_workflow, indent=None, separators=(',', ':'))) + + +def print_devcontainer_info(args): + devcontainer_version = matrix_yaml['devcontainer_version'] + + matrix_jobs = [] + for workflow in matrix_yaml['workflows']: + matrix_jobs.extend(matrix_yaml['workflows'][workflow]) + matrix_jobs = preprocess_matrix_jobs(matrix_jobs) + + # Remove all but the following keys from the matrix jobs: + keep_keys = ['ctk', 'host_compiler', 'os'] + combinations = [{key: job[key] for key in keep_keys} for job in matrix_jobs] + + # Remove duplicates and filter out windows jobs: + unique_combinations = [] + for combo in combinations: + if not is_windows(combo) and combo not in unique_combinations: + unique_combinations.append(combo) + + for combo in unique_combinations: + combo['compiler_name'] = combo['host_compiler']['name'] + combo['compiler_version'] = combo['host_compiler']['version'] + combo['compiler_exe'] = combo['host_compiler']['exe'] + del combo['host_compiler'] + + combo['cuda'] = combo['ctk'] + del combo['ctk'] + + devcontainer_json = {'devcontainer_version': devcontainer_version, 'combinations': unique_combinations} + + # Pretty print the devcontainer json to stdout: + print(json.dumps(devcontainer_json, indent=2)) + + +def main(): + global matrix_yaml + + parser = argparse.ArgumentParser(description='Compute matrix for workflow') + parser.add_argument('matrix_file', help='Path to the matrix YAML file') + parser_mode = parser.add_mutually_exclusive_group(required=True) + parser_mode.add_argument('--workflow', help='Print GHA workflow [pull_request, nightly, weekly, etc]') + parser_mode.add_argument('--devcontainer-info', action='store_true', + help='Print devcontainer info instead of GHA workflows.') + parser.add_argument('--dirty-projects', nargs='*', help='Filter jobs to only these projects') + args = parser.parse_args() + + # Check if the matrix file exists + if not os.path.isfile(args.matrix_file): + print(f"Error: Matrix file '{args.matrix_file}' does not exist.") + sys.exit(1) + + with open(args.matrix_file, 'r') as f: + matrix_yaml = yaml.safe_load(f) + + # Check if the workflow is valid + if args.workflow and 'workflows' not in matrix_yaml: + print(f"Error: Workflow 'workflows.{args.workflow}' does not exist in the matrix YAML.") + sys.exit(1) + + # Print usage if no arguments are provided + if not args.matrix_file and not args.workflow: + parser.print_usage() + sys.exit(1) + + # print("Arguments:", file=sys.stderr) + # print(args, file=sys.stderr) + # print("::group::Matrix YAML", file=sys.stderr) + # print("Matrix YAML:", file=sys.stderr) + # print(matrix_yaml, file=sys.stderr) + # print("::end-group::", file=sys.stderr) + + if args.workflow: + print_gha_workflow(args) + elif args.devcontainer_info: + print_devcontainer_info(args) + else: + print("Error: Either --workflow WORKFLOW or --devcontainers must be specified.", file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/ci/infra_cccl.sh b/ci/infra_cccl.sh new file mode 100755 index 00000000000..475799ace26 --- /dev/null +++ b/ci/infra_cccl.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +source "$(dirname "$0")/build_common.sh" + +print_environment_details + +PRESET="cccl-infra" + +CMAKE_OPTIONS="" + +GPU_REQUIRED="false" + +if [ -n "${GITHUB_SHA:-}" ]; then + CMAKE_OPTIONS="$CMAKE_OPTIONS -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}" +fi + +configure_preset "CCCL Infra" "$PRESET" "$CMAKE_OPTIONS" +test_preset "CCCL Infra" "$PRESET" "$GPU_REQUIRED" + +print_time_summary diff --git a/ci/inspect_changes.sh b/ci/inspect_changes.sh index 59500a70554..dce8e2033fd 100755 --- a/ci/inspect_changes.sh +++ b/ci/inspect_changes.sh @@ -21,6 +21,7 @@ base_sha=$(git merge-base $head_sha $base_sha) # Define a list of subproject directories: subprojects=( + cccl libcudacxx cub thrust @@ -28,6 +29,7 @@ subprojects=( # ...and their dependencies: declare -A dependencies=( + [cccl]="" [libcudacxx]="cccl" [cub]="cccl libcudacxx thrust" [thrust]="cccl libcudacxx cub" @@ -90,19 +92,6 @@ add_dependencies() { return 0 } -# write_subproject_status -# Write the output _DIRTY={true|false} -write_subproject_status() { - local subproject="$1" - local dirty_flag=${subproject^^}_DIRTY - - if [[ ${!dirty_flag} -ne 0 ]]; then - write_output "${dirty_flag}" "true" - else - write_output "${dirty_flag}" "false" - fi -} - main() { # Print the list of subprojects and all of their dependencies: echo "Subprojects: ${subprojects[*]}" @@ -118,21 +107,30 @@ main() { echo # Print the list of files that have changed: - echo "Dirty files:" + echo "::group::Dirty files" dirty_files | sed 's/^/ - /' - echo "" + echo "::endgroup::" + echo echo "Modifications in project?" + # Assign the return value of `inspect_cccl` to the variable `CCCL_DIRTY`: inspect_cccl CCCL_DIRTY=$? - echo "$(if [[ ${CCCL_DIRTY} -eq 0 ]]; then echo " "; else echo "X"; fi) - CCCL Infrastructure" + checkmark=$(if [[ ${CCCL_DIRTY} -eq 0 ]]; then echo " "; else echo "X"; fi) + echo "${checkmark} - CCCL Infrastructure" # Check for changes in each subprojects directory: for subproject in "${subprojects[@]}"; do + if [[ ${subproject} == "cccl" ]]; then + # Special case handled above. + continue + fi + inspect_subdir $subproject declare ${subproject^^}_DIRTY=$? - echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}" + checkmark=$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) + echo "${checkmark} - ${subproject}" done echo @@ -140,13 +138,20 @@ main() { for subproject in "${subprojects[@]}"; do add_dependencies ${subproject} declare ${subproject^^}_DIRTY=$? - echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}" + checkmark=$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) + echo "${checkmark} - ${subproject}" done echo + declare -a dirty_subprojects=() for subproject in "${subprojects[@]}"; do - write_subproject_status ${subproject} + var_name="${subproject^^}_DIRTY" + if [[ ${!var_name} -ne 0 ]]; then + dirty_subprojects+=("$subproject") + fi done + + write_output "DIRTY_PROJECTS" "${dirty_subprojects[*]}" } main "$@" diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 589de44bd3c..88aececded1 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -1,12 +1,6 @@ - -cuda_prev_min: &cuda_prev_min '11.1' -cuda_prev_max: &cuda_prev_max '11.8' -cuda_curr: &cuda_curr '12.4' - -# The GPUs to test on -gpus: - - 'a100' - - 'v100' +ctk_prev_min: &ctk_prev_min '11.1' +ctk_prev_max: &ctk_prev_max '11.8' +ctk_curr: &ctk_curr '12.4' # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers devcontainer_version: '24.06' @@ -42,54 +36,156 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' } # oneAPI configs oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' } -# Each environment below will generate a unique build/test job -# See the "compute-matrix" job in the workflow for how this is parsed and used -# cuda: The CUDA Toolkit version -# os: The operating system used -# cpu: The CPU architecture -# compiler: The compiler to use -# name: The compiler name -# version: The compiler version -# exe: The unverionsed compiler binary name -# std: The C++ standards to build for -# This field is unique as it will generate an independent build/test job for each value - -# Configurations that will run for every PR -pull_request: - nvcc: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc6, std: [11, 14], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [14, 17], jobs: ['build']} - - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build', 'test']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [11, 14, 17, 20], jobs: ['build', 'test']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi, std: [11, 14, 17], jobs: ['build']} - nvrtc: - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', std: [11, 14, 17, 20]} - clang-cuda: - - {lib: ['thrust', 'cub', 'libcudacxx'], cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest, std: [17, 20]} - cccl-infra: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest} +# GHA Workflow job matrices: +workflows: + pull_request: + # default_projects: nvcc + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc6, std: [11, 14] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*gcc7, *gcc8, *gcc9, *llvm9], std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *msvc2017, std: 14 } + - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11, std: [11, 14, 17], cmake_cuda_arch: '60;70;80;90'} + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc7, *gcc8, *gcc9], std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc10, *gcc11], std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm9, *llvm10], std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm11, *llvm12, *llvm13], std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm14, *llvm15], std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc12, *llvm16], std: [11, 14, 17, 20], cpu: 'arm64'} + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20], cmake_cuda_arch: '90a'} + - {job_types: ['test'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20] } + - {job_types: ['test'], ctk: *ctk_curr, host_compiler: *llvm16, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *oneapi, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2019, std: [14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2022, std: [14, 17, 20] } + # default_projects: clang-cuda + - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]} + # nvrtc: + - {job_types: ['nvrtc'], project: 'libcudacxx', ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]} + # verify-codegen: + - { job_types: ['verify_codegen'], project: 'libcudacxx'} + # cccl-infra: + - {job_types: ['infra'], project: 'cccl', ctk: *ctk_prev_min, host_compiler: [*gcc-oldest, *llvm-oldest]} + - {job_types: ['infra'], project: 'cccl', ctk: *ctk_curr, host_compiler: [*gcc-newest, *llvm-newest]} + nightly: + - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 'v100', cmake_cuda_arch: '70', host_compiler: *gcc6, std: [11] } + - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 't4', cmake_cuda_arch: '75', host_compiler: *llvm9, std: [17] } + - {job_types: ['test'], ctk: *ctk_prev_max, gpu: 'rtx2080', cmake_cuda_arch: '75', host_compiler: *gcc11, std: [17] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc7, std: [14] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89', host_compiler: *gcc12, std: [11, 14, 17, 20] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtx4090', cmake_cuda_arch: '89', host_compiler: *llvm9, std: [11] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *gcc12, std: [11, 20] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *llvm16, std: [17] } + # nvrtc: + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 't4', cmake_cuda_arch: '75', host_compiler: *gcc12, std: [20], project: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc12, std: [20], project: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89', host_compiler: *gcc12, std: [11, 14, 17, 20], project: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *gcc12, std: [11, 20], project: ['libcudacxx']} + +# +# Resources for compute_matrix.py: +# + +# Error if tags are missing: +required_tags: ['job_types'] + +# Tags that will be added if not specified: +defaulted_tags: ['ctk', 'cpu', 'gpu', 'host_compiler', 'device_compiler', 'project', 'os'] + +# Tags that may be omitted: +optional_tags: ['std', 'cmake_cuda_arch', 'cmake_options'] + +# job_types that have an implied prerequisite 'build' job: +build_required_job_types: + - 'test' + +# job_types that require a GPU +gpu_required_job_types: + - 'test' + - 'nvrtc' + - 'infra' # cccl infra's example project test launches a kernel + +formatted_job_types: # Default: Capitalize first letter. + 'nvrtc': 'NVRTC' + 'verify_codegen': 'VerifyCodegen' + +formatted_project_names: + 'libcudacxx': 'libcu++' + 'cub': 'CUB' + 'thrust': 'Thrust' + 'cccl': 'CCCL' + +formatted_host_compiler_names: + 'llvm': 'clang' + 'oneapi': 'intel' + 'cl': 'MSVC' + +# `default_`: Used when the tag is omitted. +default_ctk: *ctk_curr +default_device_compiler: 'nvcc' +default_host_compiler: *gcc12 +default_cpu: 'amd64' +default_gpu: 'v100' +default_project: + - 'libcudacxx' + - 'cub' + - 'thrust' +# Special handling: lookup map +default_os_lookup: + 'ctk11.1-gcc6': 'ubuntu18.04' + 'ctk11.1-gcc7': 'ubuntu18.04' + 'ctk11.1-gcc8': 'ubuntu18.04' + 'ctk11.1-gcc9': 'ubuntu18.04' + 'ctk11.1-llvm9': 'ubuntu18.04' + 'ctk11.1-cl14.16': 'windows2022' + 'ctk11.8-gcc11': 'ubuntu22.04' + 'ctk12.4-gcc7': 'ubuntu20.04' + 'ctk12.4-gcc8': 'ubuntu20.04' + 'ctk12.4-gcc9': 'ubuntu20.04' + 'ctk12.4-gcc10': 'ubuntu20.04' + 'ctk12.4-gcc11': 'ubuntu22.04' + 'ctk12.4-gcc12': 'ubuntu22.04' + 'ctk12.4-llvm9': 'ubuntu20.04' + 'ctk12.4-llvm10': 'ubuntu20.04' + 'ctk12.4-llvm11': 'ubuntu20.04' + 'ctk12.4-llvm12': 'ubuntu20.04' + 'ctk12.4-llvm13': 'ubuntu20.04' + 'ctk12.4-llvm14': 'ubuntu20.04' + 'ctk12.4-llvm15': 'ubuntu22.04' + 'ctk12.4-llvm16': 'ubuntu22.04' + 'ctk12.4-cl14.29': 'windows2022' + 'ctk12.4-cl14.39': 'windows2022' + 'ctk12.4-oneapi2023.2.0': 'ubuntu22.04' + +gpus: + - 'v100' # ?? runners + - 't4' # 8 runners + - 'rtx2080' # 8 runners + - 'rtxa6000' # 12 runners + - 'l4' # 48 runners + - 'rtx4090' # 10 runners + - 'h100' # 16 runners + +gpus_sm: + 'v100': '70' + 't4': '75' + 'rtx2080': '75' + 'rtxa6000': '86' + 'l4': '89' + 'rtx4090': '89' + 'h100': '90' + +gpus_mem_gb: + 'v100': '32' + 't4': '16' + 'rtx2080': '8' + 'rtxa6000': '48' + 'l4': '24' + 'rtx4090': '24' + 'h100': '80' + +testing_pool_gpus: + - 't4' + - 'rtx2080' + - 'rtxa6000' + - 'l4' + - 'rtx4090' + - 'h100' diff --git a/ci/verify_codegen.sh b/ci/verify_codegen_libcudacxx.sh similarity index 100% rename from ci/verify_codegen.sh rename to ci/verify_codegen_libcudacxx.sh