From 2babc988c128a7ef5fb7cada650bb03c99b0ccd3 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 23 Apr 2024 00:28:22 +0000 Subject: [PATCH 1/4] Don't fail on bad-alloc for large memory test. --- cub/test/catch2_test_device_select_if.cu | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cub/test/catch2_test_device_select_if.cu b/cub/test/catch2_test_device_select_if.cu index 38a071e004..e38f9957d6 100644 --- a/cub/test/catch2_test_device_select_if.cu +++ b/cub/test/catch2_test_device_select_if.cu @@ -394,6 +394,7 @@ CUB_TEST("DeviceSelect::If works with a different output type", "[device][select } CUB_TEST("DeviceSelect::If works for very large number of items", "[device][select_if]", offset_types) +try { using type = std::int64_t; using offset_t = typename c2h::get<0, TestType>; @@ -434,8 +435,13 @@ CUB_TEST("DeviceSelect::If works for very large number of items", "[device][sele bool all_results_correct = thrust::equal(out.cbegin(), out.cend(), expected_out_it); REQUIRE(all_results_correct == true); } +catch (std::bad_alloc&) +{ + // Exceeding memory is not a failure. +} CUB_TEST("DeviceSelect::If works for very large number of output items", "[device][select_if]", offset_types) +try { using type = std::uint8_t; using offset_t = typename c2h::get<0, TestType>; @@ -472,3 +478,7 @@ CUB_TEST("DeviceSelect::If works for very large number of output items", "[devic REQUIRE(num_selected_out[0] == num_items); REQUIRE(in == out); } +catch (std::bad_alloc&) +{ + // Exceeding memory is not a failure. +} From e40e51e9973661f6a0174e7b92aa0b299d321b18 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sat, 20 Apr 2024 03:25:45 +0000 Subject: [PATCH 2/4] Rebuild CI infra, add nightly workflow. --- .devcontainer/make_devcontainers.sh | 10 +- .github/actions/compute-matrix/action.yml | 25 - .../actions/compute-matrix/compute-matrix.sh | 82 -- .github/actions/workflow-build/action.yml | 95 +++ .../actions/workflow-build/build-workflow.py | 794 ++++++++++++++++++ .github/actions/workflow-results/action.yml | 96 +++ .../actions/workflow-results/final-summary.py | 50 ++ .../prepare-execution-summary.py | 247 ++++++ .../workflow-results/verify-job-success.py | 30 + .github/workflows/build-and-test-linux.yml | 47 -- .github/workflows/build-and-test-windows.yml | 48 -- .github/workflows/ci-workflow-nightly.yml | 107 +++ .../workflows/ci-workflow-pull-request.yml | 134 +++ .github/workflows/dispatch-build-and-test.yml | 51 -- .github/workflows/pr.yml | 242 ------ .github/workflows/run-as-coder.yml | 68 -- .github/workflows/workflow-dispatch-job.yml | 166 ++++ .../workflows/workflow-dispatch-two-stage.yml | 64 ++ .github/workflows/workflow-dispatch.yml | 53 ++ .gitignore | 2 +- CMakePresets.json | 2 +- ci-overview.md | 2 + ci/build_common.sh | 7 +- ci/infra_cccl.sh | 20 + ci/inspect_changes.sh | 95 ++- ci/matrix.yaml | 301 +++++-- ...odegen.sh => verify_codegen_libcudacxx.sh} | 0 27 files changed, 2181 insertions(+), 657 deletions(-) delete mode 100644 .github/actions/compute-matrix/action.yml delete mode 100755 .github/actions/compute-matrix/compute-matrix.sh create mode 100644 .github/actions/workflow-build/action.yml create mode 100755 .github/actions/workflow-build/build-workflow.py create mode 100644 .github/actions/workflow-results/action.yml create mode 100755 .github/actions/workflow-results/final-summary.py create mode 100755 .github/actions/workflow-results/prepare-execution-summary.py create mode 100755 .github/actions/workflow-results/verify-job-success.py delete mode 100644 .github/workflows/build-and-test-linux.yml delete mode 100644 .github/workflows/build-and-test-windows.yml create mode 100644 .github/workflows/ci-workflow-nightly.yml create mode 100644 .github/workflows/ci-workflow-pull-request.yml delete mode 100644 .github/workflows/dispatch-build-and-test.yml delete mode 100644 .github/workflows/pr.yml delete mode 100644 .github/workflows/run-as-coder.yml create mode 100644 .github/workflows/workflow-dispatch-job.yml create mode 100644 .github/workflows/workflow-dispatch-two-stage.yml create mode 100644 .github/workflows/workflow-dispatch.yml create mode 100755 ci/infra_cccl.sh rename ci/{verify_codegen.sh => verify_codegen_libcudacxx.sh} (100%) diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh index f868cc14f1..083dc443d2 100755 --- a/.devcontainer/make_devcontainers.sh +++ b/.devcontainer/make_devcontainers.sh @@ -74,6 +74,7 @@ while [[ $# -gt 0 ]]; do done MATRIX_FILE="../ci/matrix.yaml" +COMPUTE_MATRIX="../.github/actions/workflow-build/build-workflow.py" # Enable verbose mode if requested if [ "$VERBOSE" = true ]; then @@ -82,16 +83,17 @@ if [ "$VERBOSE" = true ]; then fi # Read matrix.yaml and convert it to json -matrix_json=$(yq -o json ${MATRIX_FILE}) +matrix_json=$(python3 ${COMPUTE_MATRIX} ${MATRIX_FILE} --devcontainer-info) -# Exclude Windows environments -readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))') +if [ "$VERBOSE" = true ]; then + echo "$matrix_json" +fi # Get the devcontainer image version and define image tag root readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version') # Get unique combinations of cuda version, compiler name/version, and Ubuntu version -readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]') +readonly combinations=$(echo "$matrix_json" | jq -c '.combinations[]') # Update the base devcontainer with the default values # The root devcontainer.json file is used as the default container as well as a template for all diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml deleted file mode 100644 index b8155e7aa6..0000000000 --- a/.github/actions/compute-matrix/action.yml +++ /dev/null @@ -1,25 +0,0 @@ - -name: Compute Matrix -description: "Compute the matrix for a given matrix type from the specified matrix file" - -inputs: - matrix_query: - description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc" - required: true - matrix_file: - description: 'The file containing the matrix' - required: true -outputs: - matrix: - description: 'The requested matrix' - value: ${{ steps.compute-matrix.outputs.MATRIX }} - -runs: - using: "composite" - steps: - - name: Compute matrix - id: compute-matrix - run: | - MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} ) - echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT - shell: bash -euxo pipefail {0} diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh deleted file mode 100755 index 1629836d21..0000000000 --- a/.github/actions/compute-matrix/compute-matrix.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -write_output() { - local key="$1" - local value="$2" - echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" -} - -explode_std_versions() { - jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))' -} - -explode_libs() { - jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))' -} - -# Filter out the libraries that are dirty -filter_libs() { - all_libs=("libcudacxx" "thrust" "cub") - dirty_libs=() - for lib in "${all_libs[@]}"; do - dirty_var_name="${lib^^}_DIRTY" - # If the variable named in dirty_var_name is not set, set it to false: - : "${!dirty_var_name:=false}" - # Declare a nameref to the variable named in dirty_var_name - declare -n lib_dirty="$dirty_var_name" - # echo "${lib^^}_DIRTY: ${lib_dirty}" >> /dev/stderr - if [ "${lib_dirty}" = "true" ]; then - dirty_libs+=("$lib") - fi - done - # echo "Dirty libraries: ${dirty_libs[@]}" >> /dev/stderr - - # Construct a regex to filter out the dirty libraries - dirty_lib_regex=$(IFS="|"; echo "${dirty_libs[*]}") - dirty_lib_regex="^(${dirty_lib_regex})\$" - jq_filter="map(select(.lib | test(\"$dirty_lib_regex\")))" - jq -cr "$jq_filter" -} - -extract_matrix() { - local file="$1" - local type="$2" - local matrix=$(yq -o=json "$file" | jq -cr ".$type") - write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')" - - local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )" - local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')" - write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix" - write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')" - - write_output "NVRTC_MATRIX" "$(echo "$matrix" | jq '.nvrtc' | explode_std_versions)" - - local clang_cuda_matrix="$(echo "$matrix" | jq -cr '.["clang-cuda"]' | explode_std_versions | explode_libs | filter_libs)" - write_output "CLANG_CUDA_MATRIX" "$clang_cuda_matrix" - write_output "CCCL_INFRA_MATRIX" "$(echo "$matrix" | jq -cr '.["cccl-infra"]' )" -} - -main() { - if [ "$1" == "-v" ]; then - set -x - shift - fi - - if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then - echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE" - echo " -v : Enable verbose output" - echo " MATRIX_FILE : The path to the matrix file." - echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'" - exit 1 - fi - - echo "Input matrix file:" >&2 - cat "$1" >&2 - echo "Matrix Type: $2" >&2 - - extract_matrix "$1" "$2" -} - -main "$@" diff --git a/.github/actions/workflow-build/action.yml b/.github/actions/workflow-build/action.yml new file mode 100644 index 0000000000..36819b6229 --- /dev/null +++ b/.github/actions/workflow-build/action.yml @@ -0,0 +1,95 @@ +name: "CCCL Build Workflow" +description: "Parses a matrix definition and exports a set of dispatchable build/test/etc jobs." + +inputs: + workflows: + description: "Space separated list of workflows in matrix file to run" + required: true + skip_tests: + description: "Skip running tests" + default: "false" + required: false + inspect_changes_script: + description: "If defined, run this script to determine which projects/deps need to be tested." + default: "" + required: false + inspect_changes_base_sha: + description: "If defined, use this base ref for inspect-changes script." + default: "" + required: false + matrix_file: + description: "Path to the matrix file in the consumer repository." + default: "ci/matrix.yaml" + required: false + matrix_parser: + description: "Path to the matrix parser script (default if blank: build-workflow.py from action dir)" + default: "" + required: false + +outputs: + workflow: + description: "The dispatchable workflow matrix" + value: ${{ steps.build-workflow.outputs.workflow }} + workflow_keys: + description: "The keys of the parsed workflow" + value: ${{ steps.build-workflow.outputs.workflow_keys }} + +runs: + using: "composite" + steps: + + - name: Inspect changes + if: ${{ inputs.inspect_changes_script != '' && inputs.inspect_changes_base_sha != '' }} + id: inspect-changes + shell: bash --noprofile --norc -euo pipefail {0} + env: + base_ref: ${{ inputs.inspect_changes_base_sha }} + run: | + echo "Running inspect-changes script..." + ${{ inputs.inspect_changes_script }} ${base_ref} ${GITHUB_SHA} + echo "Exporting summary..." + mkdir workflow + cp ${GITHUB_STEP_SUMMARY} workflow/changes.md + + - name: Parse matrix file into a workflow + id: build-workflow + shell: bash --noprofile --norc -euo pipefail {0} + env: + skip_tests: ${{ inputs.skip_tests == 'true' && '--skip-tests' || ''}} + dirty_projects_flag: ${{ steps.inspect-changes.outputs.dirty_projects != '' && '--dirty-projects' || ''}} + dirty_projects: ${{ steps.inspect-changes.outputs.dirty_projects }} + matrix_parser: ${{ inputs.matrix_parser && inputs.matrix_parser || '${GITHUB_ACTION_PATH}/build-workflow.py' }} + run: | + echo "Parsing matrix file into a workflow..." + + ${{ env.matrix_parser }} ${{ inputs.matrix_file }} \ + --workflows ${{ inputs.workflows }} \ + ${{ env.skip_tests }} \ + ${{ env.dirty_projects_flag }} ${{ env.dirty_projects }} + + echo "::group::Workflow" + cat workflow/workflow.json + echo "::endgroup::" + + echo "::group::Runners" + cat workflow/runner_summary.json | jq -r '"# \(.heading)\n\n\(.body)"' | tee -a "${GITHUB_STEP_SUMMARY}" + echo "::endgroup::" + + echo "::group::Job List" + cat workflow/job_list.txt + echo "::endgroup::" + + echo "Setting outputs..." + echo "::group::GHA Output: WORKFLOW" + printf "WORKFLOW=%s\n" "$(cat workflow/workflow.json | jq -c '.')" | tee -a "${GITHUB_OUTPUT}" + echo "::endgroup::" + + echo "::group::GHA Output: WORKFLOW_KEYS" + printf "WORKFLOW_KEYS=%s\n" "$(cat workflow/workflow_keys.json | jq -c '.')" | tee -a "${GITHUB_OUTPUT}" + echo "::endgroup::" + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + name: workflow + path: workflow/ diff --git a/.github/actions/workflow-build/build-workflow.py b/.github/actions/workflow-build/build-workflow.py new file mode 100755 index 0000000000..34ee6dfafe --- /dev/null +++ b/.github/actions/workflow-build/build-workflow.py @@ -0,0 +1,794 @@ +#!/usr/bin/env python3 + +""" +Concepts: +- matrix_job: an entry of a workflow matrix, converted from matrix.yaml["workflow"][id] into a JSON object. + Example: + { + "jobs": [ + "test" + ], + "ctk": "11.1", + "gpu": "t4", + "sm": "75-real", + "cxx": { + "name": "llvm", + "version": "9", + "exe": "clang++" + }, + "std": [ + 17 + ], + "project": [ + "libcudacxx", + "cub", + "thrust" + ], + "os": "ubuntu18.04" + } + +Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where +the matrix job is turned into one or more dispatch groups consisting of potentially many jobs. + +- dispatch_group_json: A json object used in conjunction with the ci-dispatch-groups.yml GHA workflow. + Example: + { + "": { + "standalone": [ {}, ... ] + "two_stage": [ {}, ] + } + } + +- two_stage_json: A json object that represents bulk-synchronous producer/consumer jobs, used with ci-dispatch-two-stage.yml. + Example: + { + "id": "", # Used as a compact unique name for the GHA dispatch workflows. + "producers": [ {}, ... ], + "consumers": [ {}, ... ] + } + +- job_json: A json object that represents a single job in a workflow. Used with ci-dispatch-job.yml. + Example: + { + "id": "", # Used as a compact unique name for the GHA dispatch workflows. + "name": "...", + "runner": "...", + "image": "...", + "command": "..." }, + } +""" + +import argparse +import base64 +import copy +import json +import os +import re +import struct +import sys +import yaml + +matrix_yaml = None + + +def generate_guids(): + """ + Simple compact global unique ID generator. + Produces up to 65535 unique IDs between 1-3 characters in length. + Throws an exception once exhausted. + """ + i = 0 + while True: + # Generates a base64 hash of an incrementing 16-bit integer: + hash = base64.b64encode(struct.pack(">H", i)).decode('ascii') + # Strips off up-to 2 leading 'A' characters and a single trailing '=' characters, if they exist: + guid = re.sub(r'^A{0,2}', '', hash).removesuffix("=") + yield guid + i += 1 + if i >= 65535: + raise Exception("GUID generator exhausted.") + + +guid_generator = generate_guids() + + +def write_json_file(filename, json_object): + with open(filename, 'w') as f: + json.dump(json_object, f, indent=2) + + +def write_text_file(filename, text): + with open(filename, 'w') as f: + print(text, file=f) + + +def error_message_with_matrix_job(matrix_job, message): + return f"{matrix_job['origin']['workflow_location']}: {message}\n Input: {matrix_job['origin']['original_matrix_job']}" + + +def get_all_matrix_job_tags_sorted(): + required_tags = set(matrix_yaml['required_tags']) + defaulted_tags = set(matrix_yaml['defaulted_tags']) + optional_tags = set(matrix_yaml['optional_tags']) + all_tags = required_tags | defaulted_tags | optional_tags + + # Sorted using a highly subjective opinion on importance: + # Always first, information dense: + sorted_important_tags = ['project', 'jobs', 'cudacxx', 'cxx', 'ctk', 'gpu', 'std', 'sm', 'cpu'] + + # Always last, derived: + sorted_noise_tags = ['os', 'origin'] + + # In between? + sorted_tags = set(sorted_important_tags + sorted_noise_tags) + sorted_meh_tags = sorted(list(all_tags - sorted_tags)) + + return sorted_important_tags + sorted_meh_tags + sorted_noise_tags + + +def lookup_os(ctk, host_compiler): + key = f'ctk{ctk}-{host_compiler["name"]}{host_compiler["version"]}' + if not key in matrix_yaml['default_os_lookup']: + raise Exception(f"Missing matrix.yaml `default_os_lookup` entry for key `{key}`") + return matrix_yaml['default_os_lookup'][key] + + +def lookup_supported_stds(device_compiler=None, host_compiler=None): + stds = set(matrix_yaml['all_stds']) + if device_compiler: + key = f"{device_compiler['name']}{device_compiler['version']}" + if not key in matrix_yaml['lookup_cudacxx_supported_stds']: + raise Exception(f"Missing matrix.yaml 'lookup_cudacxx_supported_stds' entry for key '{key}'") + stds = stds & set(matrix_yaml['lookup_cudacxx_supported_stds'][key]) + if host_compiler: + key = f"{host_compiler['name']}{host_compiler['version']}" + if not key in matrix_yaml['lookup_cxx_supported_stds']: + raise Exception(f"Missing matrix.yaml 'lookup_cxx_supported_stds' entry for key '{key}'") + stds = stds & set(matrix_yaml['lookup_cxx_supported_stds'][key]) + return sorted(list(stds)) + + +def get_formatted_project_name(project_name): + if project_name in matrix_yaml['formatted_project_names']: + return matrix_yaml['formatted_project_names'][project_name] + return project_name + + +def get_formatted_host_compiler_name(host_compiler): + config_name = host_compiler['name'] + if config_name in matrix_yaml['formatted_cxx_names']: + return matrix_yaml['formatted_cxx_names'][config_name] + return config_name + + +def get_formatted_job_type(job_type): + if job_type in matrix_yaml['formatted_jobs']: + return matrix_yaml['formatted_jobs'][job_type] + # Return with first letter capitalized: + return job_type.capitalize() + + +def is_windows(matrix_job): + return matrix_job['os'].startswith('windows') + + +def generate_dispatch_group_name(matrix_job): + project_name = get_formatted_project_name(matrix_job['project']) + ctk = matrix_job['ctk'] + device_compiler = matrix_job['cudacxx'] + host_compiler_name = get_formatted_host_compiler_name(matrix_job['cxx']) + + compiler_info = "" + if device_compiler['name'] == 'nvcc': + compiler_info = f"nvcc {host_compiler_name}" + elif device_compiler['name'] == 'llvm': + compiler_info = f"clang-cuda" + else: + compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler_name}" + + return f"{project_name} {compiler_info} CTK{ctk}" + + +def generate_dispatch_job_name(matrix_job, job_type): + std_str = ("C++" + str(matrix_job['std']) + " ") if 'std' in matrix_job else '' + cpu_str = matrix_job['cpu'] + gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_jobs'] else "" + cuda_compile_arch = (" sm{" + matrix_job['sm'] + "}") if 'sm' in matrix_job else "" + cmake_options = (' ' + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else "" + + host_compiler_name = get_formatted_host_compiler_name(matrix_job['cxx']) + host_compiler_info = f"{host_compiler_name}{matrix_job['cxx']['version']}" + + config_tag = f"{std_str}{host_compiler_info}" + + formatted_job_type = get_formatted_job_type(job_type) + + extra_info = f":{cuda_compile_arch}{cmake_options}" if cuda_compile_arch or cmake_options else "" + + return f"[{config_tag}] {formatted_job_type}({cpu_str}{gpu_str}){extra_info}" + + +def generate_dispatch_job_runner(matrix_job, job_type): + runner_os = "windows" if is_windows(matrix_job) else "linux" + cpu = matrix_job['cpu'] + + if not job_type in matrix_yaml['gpu_required_jobs']: + return f"{runner_os}-{cpu}-cpu16" + + gpu = matrix_job['gpu'] + suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else "" + + return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}" + + +def generate_dispatch_job_image(matrix_job, job_type): + devcontainer_version = matrix_yaml['devcontainer_version'] + ctk = matrix_job['ctk'] + image_os = matrix_job['os'] + host_compiler = matrix_job['cxx']['name'] + matrix_job['cxx']['version'] + + if is_windows(matrix_job): + return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}" + + return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}" + + +def generate_dispatch_job_command(matrix_job, job_type): + script_path = "ci/windows" if is_windows(matrix_job) else "ci" + script_ext = ".ps1" if is_windows(matrix_job) else ".sh" + script_job_type = job_type + script_project = matrix_job['project'] + script_name = f"{script_path}/{script_job_type}_{script_project}{script_ext}" + + std_str = str(matrix_job['std']) if 'std' in matrix_job else '' + + host_compiler_exe = matrix_job['cxx']['exe'] + device_compiler_name = matrix_job['cudacxx']['name'] + device_compiler_exe = matrix_job['cudacxx']['exe'] + + cuda_compile_arch = matrix_job['sm'] if 'sm' in matrix_job else '' + cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else '' + + command = f"\"{script_name}\"" + if std_str: + command += f" -std \"{std_str}\"" + if cuda_compile_arch: + command += f" -arch \"{cuda_compile_arch}\"" + if device_compiler_name != 'nvcc': + command += f" -cuda \"{device_compiler_exe}\"" + if cmake_options: + cmake_args = " ".join([f"{key}={value}" for key, value in cmake_options.items()]) + command += f" -cmake-options \"{cmake_args}\"" + + return command + + +def generate_dispatch_job_origin(matrix_job, job_type): + origin = matrix_job['origin'].copy() + + matrix_job = matrix_job.copy() + del matrix_job['origin'] + + matrix_job['jobs'] = job_type + + if 'cxx' in matrix_job: + host_compiler = matrix_job['cxx'] + formatted_name = get_formatted_host_compiler_name(host_compiler) + matrix_job['cxx_name'] = formatted_name + matrix_job['cxx_full'] = formatted_name + host_compiler['version'] + del matrix_job['cxx'] + + if 'cudacxx' in matrix_job: + device_compiler = matrix_job['cudacxx'] + formatted_name = 'clang-cuda' if device_compiler['name'] == 'llvm' else device_compiler['name'] + matrix_job['cudacxx_name'] = formatted_name + matrix_job['cudacxx_full'] = formatted_name + device_compiler['version'] + del matrix_job['cudacxx'] + + origin['matrix_job'] = matrix_job + + return origin + + +def generate_dispatch_job_json(matrix_job, job_type): + return { + 'name': generate_dispatch_job_name(matrix_job, job_type), + 'runner': generate_dispatch_job_runner(matrix_job, job_type), + 'image': generate_dispatch_job_image(matrix_job, job_type), + 'command': generate_dispatch_job_command(matrix_job, job_type), + 'origin': generate_dispatch_job_origin(matrix_job, job_type) + } + + +# Create a single build producer, and a separate consumer for each test_job_type: +def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types): + build_json = generate_dispatch_job_json(matrix_job, build_job_type) + + test_json = [] + for test_job_type in test_job_types: + test_json.append(generate_dispatch_job_json(matrix_job, test_job_type)) + + return { + "producers": [build_json], + "consumers": test_json + } + + +def generate_dispatch_group_jobs(matrix_job): + dispatch_group_jobs = { + "standalone": [], + "two_stage": [] + } + + job_types = set(matrix_job['jobs']) + + build_required = set(matrix_yaml['build_required_jobs']) & job_types + has_build_and_test = len(build_required) > 0 + job_types -= build_required + + has_standalone_build = 'build' in job_types and not has_build_and_test + job_types -= {'build'} + + if has_standalone_build: + dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, "build")) + elif has_build_and_test: + dispatch_group_jobs['two_stage'].append( + generate_dispatch_build_and_test_json(matrix_job, "build", build_required)) + + # Remaining jobs are assumed to be standalone (e.g. nvrtc): + for job_type in job_types: + dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type)) + + return dispatch_group_jobs + + +def matrix_job_to_dispatch_group(matrix_job, group_prefix=""): + return {group_prefix + generate_dispatch_group_name(matrix_job): + generate_dispatch_group_jobs(matrix_job)} + + +def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups): + for group_name, group_json in new_dispatch_groups.items(): + if group_name not in accum_dispatch_groups: + accum_dispatch_groups[group_name] = group_json + else: + # iterate standalone and two_stage: + for key, value in group_json.items(): + accum_dispatch_groups[group_name][key] += value + + +def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig): + workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig) + + # Merge consumers for any two_stage arrays that have the same producer(s). Print a warning. + for group_name, group_json in workflow_dispatch_groups.items(): + if not 'two_stage' in group_json: + continue + two_stage_json = group_json['two_stage'] + merged_producers = [] + merged_consumers = [] + for two_stage in two_stage_json: + producers = two_stage['producers'] + consumers = two_stage['consumers'] + if producers in merged_producers: + producer_index = merged_producers.index(producers) + matching_consumers = merged_consumers[producer_index] + + producer_names = ", ".join([producer['name'] for producer in producers]) + print(f"::notice file=ci/matrix.yaml::Merging consumers for duplicate producer '{producer_names}' in '{group_name}'", + file=sys.stderr) + consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers]) + print(f"::notice file=ci/matrix.yaml::Original consumers: {consumer_names}", file=sys.stderr) + consumer_names = ", ".join([consumer['name'] for consumer in consumers]) + print(f"::notice file=ci/matrix.yaml::Duplicate consumers: {consumer_names}", file=sys.stderr) + # Merge if unique: + for consumer in consumers: + if consumer not in matching_consumers: + matching_consumers.append(consumer) + consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers]) + print(f"::notice file=ci/matrix.yaml::Merged consumers: {consumer_names}", file=sys.stderr) + else: + merged_producers.append(producers) + merged_consumers.append(consumers) + # Update with the merged lists: + two_stage_json = [] + for producers, consumers in zip(merged_producers, merged_consumers): + two_stage_json.append({'producers': producers, 'consumers': consumers}) + group_json['two_stage'] = two_stage_json + + # Check for any duplicate jobs in standalone arrays. Warn and remove duplicates. + for group_name, group_json in workflow_dispatch_groups.items(): + standalone_jobs = group_json['standalone'] if 'standalone' in group_json else [] + unique_standalone_jobs = [] + for job_json in standalone_jobs: + if job_json in unique_standalone_jobs: + print(f"::notice file=ci/matrix.yaml::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'", + file=sys.stderr) + else: + unique_standalone_jobs.append(job_json) + + # If any producer/consumer jobs exist in standalone arrays, warn and remove the standalones. + two_stage_jobs = group_json['two_stage'] if 'two_stage' in group_json else [] + for two_stage_job in two_stage_jobs: + for producer in two_stage_job['producers']: + if producer in unique_standalone_jobs: + print(f"::notice file=ci/matrix.yaml::Removing standalone job '{producer['name']}' " + + f"as it appears as a producer in '{group_name}'", + file=sys.stderr) + unique_standalone_jobs.remove(producer) + for consumer in two_stage_job['consumers']: + if consumer in unique_standalone_jobs: + print(f"::notice file=ci/matrix.yaml::Removing standalone job '{consumer['name']}' " + + f"as it appears as a consumer in '{group_name}'", + file=sys.stderr) + unique_standalone_jobs.remove(consumer) + standalone_jobs = list(unique_standalone_jobs) + + # If any producer or consumer job appears more than once, warn and leave as-is. + all_two_stage_jobs = [] + duplicate_jobs = {} + for two_stage_job in two_stage_jobs: + for job in two_stage_job['producers'] + two_stage_job['consumers']: + if job in all_two_stage_jobs: + duplicate_jobs[job['name']] = duplicate_jobs.get(job['name'], 1) + 1 + else: + all_two_stage_jobs.append(job) + for job_name, count in duplicate_jobs.items(): + print(f"::warning file=ci/matrix.yaml::" + + f"Job '{job_name}' appears {count} times in '{group_name}'.", + f"Cannot remove duplicate while resolving dependencies. This job WILL execute {count} times.", + file=sys.stderr) + + # Remove all named values that contain an empty list of jobs: + for group_name, group_json in workflow_dispatch_groups.items(): + if not group_json['standalone'] and not group_json['two_stage']: + del workflow_dispatch_groups[group_name] + elif not group_json['standalone']: + del group_json['standalone'] + elif not group_json['two_stage']: + del group_json['two_stage'] + + # Natural sort impl (handles embedded numbers in strings, case insensitive) + def natural_sort_key(key): + return [(int(text) if text.isdigit() else text.lower()) for text in re.split('(\d+)', key)] + + # Sort the dispatch groups by name: + workflow_dispatch_groups = dict(sorted(workflow_dispatch_groups.items(), key=lambda x: natural_sort_key(x[0]))) + + # Sort the jobs within each dispatch group: + for group_name, group_json in workflow_dispatch_groups.items(): + if 'standalone' in group_json: + group_json['standalone'] = sorted(group_json['standalone'], key=lambda x: natural_sort_key(x['name'])) + if 'two_stage' in group_json: + group_json['two_stage'] = sorted( + group_json['two_stage'], key=lambda x: natural_sort_key(x['producers'][0]['name'])) + + # Check to see if any .two_stage.producers arrays have more than 1 job, which is not supported. + # See ci-dispatch-two-stage.yml for details. + for group_name, group_json in workflow_dispatch_groups.items(): + if 'two_stage' in group_json: + for two_stage_json in group_json['two_stage']: + num_producers = len(two_stage_json['producers']) + if num_producers > 1: + producer_names = "" + for job in two_stage_json['producers']: + producer_names += f" - {job['name']}\n" + error_message = f"ci-dispatch-two-stage.yml currently only supports a single producer. " + error_message += f"Found {num_producers} producers in '{group_name}':\n{producer_names}" + print(f"::error file=ci/matrix.yaml::{error_message}", file=sys.stderr) + raise Exception(error_message) + + # Assign unique IDs in appropriate locations. + # These are used to give "hidden" dispatch jobs a short, unique name, + # otherwise GHA generates a long, cluttered name. + for group_name, group_json in workflow_dispatch_groups.items(): + if 'standalone' in group_json: + for job_json in group_json['standalone']: + job_json['id'] = next(guid_generator) + if 'two_stage' in group_json: + for two_stage_json in group_json['two_stage']: + two_stage_json['id'] = next(guid_generator) + for job_json in two_stage_json['producers'] + two_stage_json['consumers']: + job_json['id'] = next(guid_generator) + + return workflow_dispatch_groups + + +def find_workflow_line_number(workflow_name): + regex = re.compile(f"^( )*{workflow_name}:", re.IGNORECASE) + line_number = 0 + with open(matrix_yaml['filename'], 'r') as f: + for line in f: + line_number += 1 + if regex.match(line): + return line_number + raise Exception( + f"Workflow '{workflow_name}' not found in {matrix_yaml['filename]']} (could not match regex: {regex})") + + +def get_matrix_job_origin(matrix_job, workflow_name, workflow_location): + filename = matrix_yaml['filename'] + original_matrix_job = json.dumps(matrix_job, indent=None, separators=(', ', ': ')) + original_matrix_job = original_matrix_job.replace('"', '') + return { + 'filename': filename, + 'workflow_name': workflow_name, + 'workflow_location': workflow_location, + 'original_matrix_job': original_matrix_job + } + + +def remove_skip_test_jobs(matrix_jobs): + '''Remove jobs defined in `matrix_file.skip_test_jobs`.''' + new_matrix_jobs = [] + for matrix_job in matrix_jobs: + jobs = matrix_job['jobs'] + new_jobs = set() + for job in jobs: + if job in matrix_yaml['skip_test_jobs']: + # If a skipped test job is a build_required_job, replace it with the 'build' job. + if job in matrix_yaml['build_required_jobs']: + # Replace with the prerequisite build job: + new_jobs.add('build') + # If a skipped test job is not a build_required_job, ignore it. + else: + pass # Ignore the job + else: + new_jobs.add(job) + # If no jobs remain, skip this matrix job. + if new_jobs: + new_matrix_job = copy.deepcopy(matrix_job) + new_matrix_job['jobs'] = list(new_jobs) + new_matrix_jobs.append(new_matrix_job) + return new_matrix_jobs + + +def validate_required_tags(matrix_job): + for tag in matrix_yaml['required_tags']: + if tag not in matrix_job: + raise Exception(error_message_with_matrix_job(matrix_job, f"Missing required tag '{tag}'")) + + all_tags = get_all_matrix_job_tags_sorted() + for tag in matrix_job: + if tag not in all_tags: + raise Exception(error_message_with_matrix_job(matrix_job, f"Unknown tag '{tag}'")) + + if 'gpu' in matrix_job and matrix_job['gpu'] not in matrix_yaml['gpus']: + raise Exception(error_message_with_matrix_job(matrix_job, f"Unknown gpu '{matrix_job['gpu']}'")) + + +def set_default_tags(matrix_job): + generic_defaults = set(matrix_yaml['defaulted_tags']) + generic_defaults -= set(['os']) # handled specially. + + for tag in generic_defaults: + if tag not in matrix_job: + matrix_job[tag] = matrix_yaml['default_'+tag] + + +def set_derived_tags(matrix_job): + if 'os' not in matrix_job: + matrix_job['os'] = lookup_os(matrix_job['ctk'], matrix_job['cxx']) + + # Expand nvcc device compiler shortcut: + if matrix_job['cudacxx'] == 'nvcc': + matrix_job['cudacxx'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'} + + if 'sm' in matrix_job and matrix_job['sm'] == 'gpu': + if not 'gpu' in matrix_job: + raise Exception(error_message_with_matrix_job(matrix_job, f"\"sm: 'gpu'\" requires tag 'gpu'.")) + if not matrix_job['gpu'] in matrix_yaml['gpu_sm']: + raise Exception(error_message_with_matrix_job(matrix_job, + f"Missing matrix.yaml 'gpu_sm' entry for gpu '{matrix_job['gpu']}'")) + matrix_job['sm'] = matrix_yaml['gpu_sm'][matrix_job['gpu']] + + if 'std' in matrix_job and matrix_job['std'] == 'all': + host_compiler = matrix_job['cxx'] if 'cxx' in matrix_job else None + device_compiler = matrix_job['cudacxx'] if 'cudacxx' in matrix_job else None + matrix_job['std'] = lookup_supported_stds(device_compiler, host_compiler) + + +def next_explode_tag(matrix_job): + for tag in matrix_job: + if not tag in matrix_yaml['non_exploded_tags'] and isinstance(matrix_job[tag], list): + return tag + return None + + +def explode_tags(matrix_job, explode_tag=None): + if not explode_tag: + explode_tag = next_explode_tag(matrix_job) + + if not explode_tag: + return [matrix_job] + + result = [] + for value in matrix_job[explode_tag]: + new_job = copy.deepcopy(matrix_job) + new_job[explode_tag] = value + result.extend(explode_tags(new_job)) + + return result + + +def preprocess_matrix_jobs(matrix_jobs): + result = [] + for matrix_job in matrix_jobs: + validate_required_tags(matrix_job) + set_default_tags(matrix_job) + for job in explode_tags(matrix_job): + set_derived_tags(job) + # The derived tags may need to be exploded again: + result.extend(explode_tags(job)) + return result + + +def parse_workflow_matrix_jobs(args, workflow_name): + if not workflow_name in matrix_yaml['workflows']: + raise Exception(f"Workflow '{workflow_name}' not found in matrix file '{matrix_yaml['filename']}'") + + matrix_jobs = matrix_yaml['workflows'][workflow_name] + workflow_line_number = find_workflow_line_number(workflow_name) + + # Tag with the original matrix info, location, etc. for error messages and post-processing. + # Do this first so the original tags / order /idx match the inpt object exactly. + for idx, matrix_job in enumerate(matrix_jobs): + workflow_location = f"{matrix_yaml['filename']}:{workflow_line_number} (job {idx + 1})" + matrix_job['origin'] = get_matrix_job_origin(matrix_job, workflow_name, workflow_location) + + # Fill in default values, explode lists. + matrix_jobs = preprocess_matrix_jobs(matrix_jobs) + + if args.skip_tests: + matrix_jobs = remove_skip_test_jobs(matrix_jobs) + if args.dirty_projects: + matrix_jobs = [job for job in matrix_jobs if job['project'] in args.dirty_projects] + + # Sort the tags by, *ahem*, "importance": + sorted_tags = get_all_matrix_job_tags_sorted() + matrix_jobs = [{tag: matrix_job[tag] for tag in sorted_tags if tag in matrix_job} for matrix_job in matrix_jobs] + + return matrix_jobs + + +def parse_workflow_dispatch_groups(args, workflow_name): + # Add origin information to each matrix job, explode, filter, add defaults, etc. + # The resulting matrix_jobs list is a complete and standardized list of jobs for the dispatch_group builder. + matrix_jobs = parse_workflow_matrix_jobs(args, workflow_name) + + # If we're printing multiple workflows, add a prefix to the group name to differentiate them. + group_prefix = f"[{workflow_name}] " if len(args.workflows) > 1 else "" + + # Convert the matrix jobs into a dispatch group object: + workflow_dispatch_groups = {} + for matrix_job in matrix_jobs: + matrix_job_dispatch_group = matrix_job_to_dispatch_group(matrix_job, group_prefix) + merge_dispatch_groups(workflow_dispatch_groups, matrix_job_dispatch_group) + + return finalize_workflow_dispatch_groups(workflow_dispatch_groups) + + +def write_outputs(final_workflow): + job_list = [] + runner_counts = {} + id_to_full_job_name = {} + + total_jobs = 0 + + def process_job_array(group_name, array_name, parent_json): + nonlocal job_list + nonlocal runner_counts + nonlocal total_jobs + + job_array = parent_json[array_name] if array_name in parent_json else [] + for job_json in job_array: + total_jobs += 1 + job_list.append(f"{total_jobs:4} id: {job_json['id']:<4} {array_name:13} {job_json['name']}") + id_to_full_job_name[job_json['id']] = f"{group_name} {job_json['name']}" + runner = job_json['runner'] + runner_counts[runner] = runner_counts.get(runner, 0) + 1 + + for group_name, group_json in final_workflow.items(): + job_list.append(f"{'':4} {group_name}:") + process_job_array(group_name, 'standalone', group_json) + if 'two_stage' in group_json: + for two_stage_json in group_json['two_stage']: + process_job_array(group_name, 'producers', two_stage_json) + process_job_array(group_name, 'consumers', two_stage_json) + + # Sort by descending counts: + runner_counts = {k: v for k, v in sorted(runner_counts.items(), key=lambda item: item[1], reverse=True)} + + runner_heading = f"🏃‍ Runner counts (total jobs: {total_jobs})" + + runner_counts_table = f"| {'#':^4} | Runner\n" + runner_counts_table += "|------|------\n" + for runner, count in runner_counts.items(): + runner_counts_table += f"| {count:4} | `{runner}`\n" + + runner_json = {"heading": runner_heading, "body": runner_counts_table} + + os.makedirs("workflow", exist_ok=True) + write_json_file("workflow/workflow.json", final_workflow) + write_json_file("workflow/workflow_keys.json", list(final_workflow.keys())) + write_json_file("workflow/job_ids.json", id_to_full_job_name) + write_text_file("workflow/job_list.txt", "\n".join(job_list)) + write_json_file("workflow/runner_summary.json", runner_json) + + +def print_gha_workflow(args): + final_workflow = {} + for workflow_name in args.workflows: + workflow_dispatch_groups = parse_workflow_dispatch_groups(args, workflow_name) + merge_dispatch_groups(final_workflow, workflow_dispatch_groups) + + write_outputs(final_workflow) + + +def print_devcontainer_info(args): + devcontainer_version = matrix_yaml['devcontainer_version'] + + matrix_jobs = [] + for workflow in matrix_yaml['workflows']: + matrix_jobs.extend(parse_workflow_matrix_jobs(args, workflow)) + + # Remove all but the following keys from the matrix jobs: + keep_keys = ['ctk', 'cxx', 'os'] + combinations = [{key: job[key] for key in keep_keys} for job in matrix_jobs] + + # Remove duplicates and filter out windows jobs: + unique_combinations = [] + for combo in combinations: + if not is_windows(combo) and combo not in unique_combinations: + unique_combinations.append(combo) + + for combo in unique_combinations: + combo['compiler_name'] = combo['cxx']['name'] + combo['compiler_version'] = combo['cxx']['version'] + combo['compiler_exe'] = combo['cxx']['exe'] + del combo['cxx'] + + combo['cuda'] = combo['ctk'] + del combo['ctk'] + + devcontainer_json = {'devcontainer_version': devcontainer_version, 'combinations': unique_combinations} + + # Pretty print the devcontainer json to stdout: + print(json.dumps(devcontainer_json, indent=2)) + + +def main(): + parser = argparse.ArgumentParser(description='Compute matrix for workflow') + parser.add_argument('matrix_file', help='Path to the matrix YAML file') + parser_mode_group = parser.add_argument_group('Output Mode', "Must specify one of these options.") + parser_mode = parser_mode_group.add_mutually_exclusive_group(required=True) + parser_mode.add_argument('--workflows', nargs='+', + help='Print GHA workflow with jobs from [pull_request, nightly, weekly, etc]') + parser_mode.add_argument('--devcontainer-info', action='store_true', + help='Print devcontainer info instead of GHA workflows.') + parser.add_argument('--dirty-projects', nargs='*', help='Filter jobs to only these projects') + parser.add_argument('--skip-tests', action='store_true', + help='Remove jobs defined in `matrix_file.skip_test_jobs`.') + args = parser.parse_args() + + # Check if the matrix file exists + if not os.path.isfile(args.matrix_file): + print(f"Error: Matrix file '{args.matrix_file}' does not exist.") + sys.exit(1) + + with open(args.matrix_file, 'r') as f: + global matrix_yaml + matrix_yaml = yaml.safe_load(f) + matrix_yaml['filename'] = args.matrix_file + + if args.workflows: + print_gha_workflow(args) + elif args.devcontainer_info: + print_devcontainer_info(args) + else: + parser.print_usage() + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/.github/actions/workflow-results/action.yml b/.github/actions/workflow-results/action.yml new file mode 100644 index 0000000000..5c08a55fdd --- /dev/null +++ b/.github/actions/workflow-results/action.yml @@ -0,0 +1,96 @@ +name: "CCCL Workflow Sentinal" +description: "Check the results of the dispatched jobs and comment on the PR." + +inputs: + github_token: + description: "The GitHub token to use for commenting on the PR. No comment will be made if not provided." + required: false + pr_number: + description: "The PR number to comment on, if applicable. No comment will be made if not provided." + required: false + +outputs: + success: + description: "Whether any jobs failed." + value: ${{ steps.check-dispatch.outputs.success }} + +runs: + using: "composite" + steps: + + - name: Download workflow artifacts + uses: actions/download-artifact@v3 + with: + name: workflow + path: workflow/ + + - name: Download job success artifacts + continue-on-error: true # This may fail if no jobs succeed. The checks below will catch this. + uses: actions/download-artifact@v3 + with: + name: dispatch-job-success + path: dispatch-job-success/ + + - name: Prepare execution summary + id: job-summary + continue-on-error: true + shell: bash --noprofile --norc -euo pipefail {0} + run: | + echo "Generating job summary..." + python3 "${GITHUB_ACTION_PATH}/prepare-execution-summary.py" workflow/workflow.json + + - name: Prepare final summary + id: final-summary + continue-on-error: true + shell: bash --noprofile --norc -euo pipefail {0} + run: | + echo "::group::Final Summary" + python3 "${GITHUB_ACTION_PATH}/final-summary.py" | tee final_summary.md + echo "::endgroup::" + + # This allows multiline strings and special characters to be passed through the GHA outputs: + url_encode_string() { + python3 -c "import sys; from urllib.parse import quote; print(quote(sys.stdin.read()))" + } + + echo "::group::GHA Output: SUMMARY" + printf "SUMMARY=%s\n" "$(cat final_summary.md | url_encode_string)" | tee -a "${GITHUB_OUTPUT}" + echo "::endgroup::" + + cp final_summary.md ${GITHUB_STEP_SUMMARY} + + - name: Comment on PR + if: ${{ !cancelled() && inputs.pr_number != '' && inputs.github_token != ''}} + continue-on-error: true + env: + PR_NUMBER: ${{ fromJSON(inputs.pr_number) }} + COMMENT_BODY: ${{ steps.final-summary.outputs.SUMMARY }} + uses: actions/github-script@v4 + with: + github-token: ${{ inputs.github_token }} + script: | + const pr_number = process.env.PR_NUMBER; + const owner = 'NVIDIA'; + const repo = 'cccl'; + // Decode URL-encoded string for proper display in comments + const commentBody = decodeURIComponent(process.env.COMMENT_BODY); + console.log('::group::Commenting on PR #' + pr_number + ' with the following message:') + console.log(commentBody); + console.log('::endgroup::'); + github.issues.createComment({ + owner: owner, + repo: repo, + issue_number: pr_number, + body: commentBody + }); + + - name: Check for job success + id: check-dispatch + shell: bash --noprofile --norc -euo pipefail {0} + run: | + if "${GITHUB_ACTION_PATH}/verify-job-success.py" workflow/job_ids.json; then + echo "success=true" >> "${GITHUB_OUTPUT}" + else + echo "success=false" >> "${GITHUB_OUTPUT}" + exit 1 + fi diff --git a/.github/actions/workflow-results/final-summary.py b/.github/actions/workflow-results/final-summary.py new file mode 100755 index 0000000000..3057724d81 --- /dev/null +++ b/.github/actions/workflow-results/final-summary.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import json +import os +import re +import sys + + +def read_file(filepath): + with open(filepath, 'r') as f: + return f.read().rstrip("\n ") + +def print_file_if_present(filepath): + if os.path.exists(filepath): + print(read_file(filepath) + "\n\n") + + +def print_summary_file(filepath, heading_level): + summary_json = json.load(open(filepath)) + print(f"
{summary_json['heading']}\n") + print(summary_json["body"] + "\n") + print("
\n") + + +def main(): + # List of all projects detected in 'execution/projects/{project}_summary.json': + projects = [] + project_file_regex="(.*)_summary.json" + for filename in os.listdir("execution/projects"): + match = re.match(project_file_regex, filename) + if match: + projects.append(match.group(1)) + + print(f"
{read_file('execution/heading.txt')}\n") + + print("
    ") + for project in projects: + print("
  • ") + print_summary_file(f"execution/projects/{project}_summary.json", 3) + print("
\n") + + print_summary_file("workflow/runner_summary.json", 2) + print_file_if_present('workflow/changes.md') + + print("
") + + + +if __name__ == '__main__': + main() diff --git a/.github/actions/workflow-results/prepare-execution-summary.py b/.github/actions/workflow-results/prepare-execution-summary.py new file mode 100755 index 0000000000..26b8e82363 --- /dev/null +++ b/.github/actions/workflow-results/prepare-execution-summary.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 + + +import argparse +import json +import os +import re +import sys + + +def job_succeeded(job): + # The job was successful if the artifact file 'dispatch-job-success/dispatch-job-success-' exists: + return os.path.exists(f'dispatch-job-success/{job["id"]}') + + +def natural_sort_key(key): + # Natural sort impl (handles embedded numbers in strings, case insensitive) + return [(int(text) if text.isdigit() else text.lower()) for text in re.split('(\d+)', key)] + + +# Print the prepared text summary to the file at the given path +def write_text(filepath, summary): + with open(filepath, 'w') as f: + print(summary, file=f) + + +# Print the prepared JSON object to the file at the given path +def write_json(filepath, json_object): + with open(filepath, 'w') as f: + json.dump(json_object, f, indent=4) + + +def extract_jobs(workflow): + jobs = [] + for group_name, group in workflow.items(): + if "standalone" in group: + jobs += group["standalone"] + if "two_stage" in group: + for two_stage in group["two_stage"]: + jobs += two_stage["producers"] + jobs += two_stage["consumers"] + return jobs + + +def build_summary(jobs): + summary = {'passed': 0, 'failed': 0, 'projects': {}} + projects = summary['projects'] + + for job in jobs: + success = job_succeeded(job) + + if success: + summary['passed'] += 1 + else: + summary['failed'] += 1 + + matrix_job = job["origin"]["matrix_job"] + + project = matrix_job["project"] + if not project in projects: + projects[project] = {'passed': 0, 'failed': 0, 'tags': {}} + + if success: + projects[project]['passed'] += 1 + else: + projects[project]['failed'] += 1 + + tags = projects[project]['tags'] + for tag in matrix_job.keys(): + if tag == 'project': + continue + + if not tag in tags: + tags[tag] = {'passed': 0, 'failed': 0, 'values': {}} + + value = str(matrix_job[tag]) + values = tags[tag]['values'] + + if not value in values: + values[value] = {'passed': 0, 'failed': 0} + + if success: + tags[tag]['passed'] += 1 + values[value]['passed'] += 1 + else: + tags[tag]['failed'] += 1 + values[value]['failed'] += 1 + + # Natural sort the value strings within each tag: + for project, project_summary in projects.items(): + for tag, tag_summary in project_summary['tags'].items(): + tag_summary['values'] = dict(sorted(tag_summary['values'].items(), + key=lambda item: natural_sort_key(item[0]))) + + # Sort the tags within each project so that: + # - "Likely culprits" come first. These are tags that have multiple values, but only one has failures. + # - The remaining tags with failures come next. + # - Tags with no failures come last. + def rank_tag(tag_summary): + num_failing_values = sum(1 for value_summary in tag_summary['values'].values() if value_summary['failed'] > 0) + + if len(tag_summary['values']) > 1 and num_failing_values == 1: + return 0 + elif len(tag_summary['values']) > 1 and tag_summary['failed'] > 0: + return 1 + elif tag_summary['failed'] > 0: + return 2 + return 3 + for project, project_summary in projects.items(): + project_summary['tags'] = dict(sorted(project_summary['tags'].items(), + key=lambda item: (rank_tag(item[1]), item[0]))) + + return summary + + +def get_summary_heading(summary): + passed = summary['passed'] + failed = summary['failed'] + total = passed + failed + + if passed == 0: + flag = '🟥' + elif failed > 0: + flag = '🟨' + else: + flag = '🟩' + + return f'{flag} CI Results [ Failed: {failed} | Passed: {passed} | Total: {total} ]' + + +def get_project_heading(project, project_summary): + if project_summary['passed'] == 0: + flag = '🟥' + elif project_summary['failed'] > 0: + flag = '🟨' + else: + flag = '🟩' + + passed = project_summary['passed'] + failed = project_summary['failed'] + total = project_summary['failed'] + project_summary['passed'] + + return f'{flag} Project {project} [ Failed: {failed} | Passed: {passed} | Total: {total} ]' + + +def get_tag_line(tag, tag_summary): + passed = tag_summary['passed'] + failed = tag_summary['failed'] + values = tag_summary['values'] + + # Find the value with an failure rate that matches the tag's failure rate: + suspicious = None + if len(values) > 1 and failed > 0: + for value, value_summary in values.items(): + if value_summary['failed'] == failed: + suspicious = value_summary + suspicious['name'] = value + break + + # Did any jobs with this value pass? + likely_culprit = suspicious if suspicious and suspicious['passed'] == 0 else None + + note = '' + if likely_culprit: + flag = '🚨' + note = f': {likely_culprit["name"]} {flag}' + elif suspicious: + flag = '🔍' + note = f': {suspicious["name"]} {flag}' + elif passed == 0: + flag = '🟥' + elif failed > 0: + flag = '🟨' + else: + flag = '🟩' + + return f'{flag} {tag}{note}' + + +def get_value_line(value, value_summary, tag_summary): + passed = value_summary['passed'] + failed = value_summary['failed'] + total = passed + failed + + parent_size = len(tag_summary['values']) + parent_failed = tag_summary['failed'] + + is_suspicious = failed > 0 and failed == parent_failed and parent_size > 1 + is_likely_culprit = is_suspicious and passed == 0 + + if is_likely_culprit: + flag = '🔥' + elif is_suspicious: + flag = '🔍' + elif passed == 0: + flag = '🟥' + elif failed > 0: + flag = '🟨' + else: + flag = '🟩' + + percent = int(100 * failed / total) + left_aligned = f"{flag} {value} ({percent}% Fail)" + return f' {left_aligned:<30} Failed: {failed:^3} -- Passed: {passed:^3} -- Total: {total:^3}' + + +def get_project_summary_body(project, project_summary): + body = ['```'] + for tag, tag_summary in project_summary['tags'].items(): + body.append(get_tag_line(tag, tag_summary)) + for value, value_summary in tag_summary['values'].items(): + body.append(get_value_line(value, value_summary, tag_summary)) + body.append('```') + return "\n".join(body) + + +def write_project_summary(project, project_summary): + heading = get_project_heading(project, project_summary) + body = get_project_summary_body(project, project_summary) + + summary = {'heading': heading, 'body': body} + + write_json(f'execution/projects/{project}_summary.json', summary) + + +def write_workflow_summary(workflow): + summary = build_summary(extract_jobs(workflow)) + + os.makedirs('execution/projects', exist_ok=True) + + write_text('execution/heading.txt', get_summary_heading(summary)) + + for project, project_summary in summary['projects'].items(): + write_project_summary(project, project_summary) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('workflow', type=argparse.FileType('r')) + args = parser.parse_args() + + workflow = json.load(args.workflow) + write_workflow_summary(workflow) + + +if __name__ == '__main__': + main() diff --git a/.github/actions/workflow-results/verify-job-success.py b/.github/actions/workflow-results/verify-job-success.py new file mode 100755 index 0000000000..d2e69f0677 --- /dev/null +++ b/.github/actions/workflow-results/verify-job-success.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import sys + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("job_id_map", type=argparse.FileType('r')) + args = parser.parse_args() + + job_id_map = json.load(args.job_id_map) + + # For each job id, verify that the file 'dispatch-job-success/' exists + success = True + for job_id, job_name in job_id_map.items(): + success_file = f'dispatch-job-success/{job_id}' + print(f'Verifying job with id "{job_id}": "{job_name}"') + if not os.path.exists(success_file): + print(f'Failed: Artifact "dispatch-job-success/{job_id}" not found') + success = False + + if not success: + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/.github/workflows/build-and-test-linux.yml b/.github/workflows/build-and-test-linux.yml deleted file mode 100644 index 6c5ba40061..0000000000 --- a/.github/workflows/build-and-test-linux.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: build and test - -defaults: - run: - shell: bash -exo pipefail {0} - -on: - workflow_call: - inputs: - cpu: {type: string, required: true} - test_name: {type: string, required: false} - build_script: {type: string, required: false} - test_script: {type: string, required: false} - container_image: {type: string, required: false} - run_tests: {type: boolean, required: false, default: true} - -permissions: - contents: read - -jobs: - build: - name: Build ${{inputs.test_name}} - permissions: - id-token: write - contents: read - uses: ./.github/workflows/run-as-coder.yml - with: - name: Build ${{inputs.test_name}} - runner: linux-${{inputs.cpu}}-cpu16 - image: ${{ inputs.container_image }} - command: | - ${{ inputs.build_script }} - - test: - needs: build - permissions: - id-token: write - contents: read - if: ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}} - name: Test ${{inputs.test_name}} - uses: ./.github/workflows/run-as-coder.yml - with: - name: Test ${{inputs.test_name}} - runner: linux-${{inputs.cpu}}-gpu-v100-latest-1 - image: ${{inputs.container_image}} - command: | - ${{ inputs.test_script }} diff --git a/.github/workflows/build-and-test-windows.yml b/.github/workflows/build-and-test-windows.yml deleted file mode 100644 index afcb78d835..0000000000 --- a/.github/workflows/build-and-test-windows.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Build Windows - -on: - workflow_call: - inputs: - test_name: {type: string, required: false} - build_script: {type: string, required: false} - container_image: {type: string, required: false} - -jobs: - prepare: - name: Build ${{inputs.test_name}} - runs-on: windows-amd64-cpu16 - permissions: - id-token: write - contents: read - env: - SCCACHE_BUCKET: rapids-sccache-devs - SCCACHE_REGION: us-east-2 - SCCACHE_IDLE_TIMEOUT: 0 - SCCACHE_S3_USE_SSL: true - SCCACHE_S3_NO_CREDENTIALS: false - steps: - - name: Get AWS credentials for sccache bucket - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA - aws-region: us-east-2 - role-duration-seconds: 43200 # 12 hours - - name: Fetch ${{ inputs.container_image }} - shell: powershell - run: docker pull ${{ inputs.container_image }} - - name: Run the tests - shell: powershell - run: >- - docker run ${{ inputs.container_image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}') - [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}') - [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}') - [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}') - [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}') - [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}') - [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}') - [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}') - git clone https://github.com/NVIDIA/cccl.git; - cd cccl; - git fetch --all; - git checkout ${{github.ref_name}}; - ${{inputs.build_script}};" diff --git a/.github/workflows/ci-workflow-nightly.yml b/.github/workflows/ci-workflow-nightly.yml new file mode 100644 index 0000000000..ed1bb149b3 --- /dev/null +++ b/.github/workflows/ci-workflow-nightly.yml @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the main workflow that runs on every PR and push to main +name: nightly + +defaults: + run: + shell: bash --noprofile --norc -euo pipefail {0} + +on: + schedule: + - cron: '0 7 * * *' # 7AM UTC, 12AM PST, 3AM EST + +concurrency: + group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} + +jobs: + + build-workflow: + name: Build workflow from matrix + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + workflow: ${{ steps.build-workflow.outputs.workflow }} + workflow_keys: ${{ steps.build-workflow.outputs.workflow_keys }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Build workflow + id: build-workflow + uses: ./.github/actions/workflow-build + with: + workflows: nightly + + run-workflow: + name: ${{ matrix.name }} + needs: build-workflow + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + name: ${{ fromJSON(needs.build-workflow.outputs.workflow_keys) }} + uses: ./.github/workflows/workflow-dispatch.yml + with: + name: ${{ matrix.name }} + jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }} + + # Check all other job statuses. This job gates branch protection checks. + ci: + name: CI + if: ${{ always() || !cancelled() }} + needs: + - build-workflow + - run-workflow + permissions: + contents: read + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Check workflow success + id: check-workflow + uses: ./.github/actions/workflow-results + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check results + run: | + status="passed" + + check_result() { + name=$1 + expected=$2 + result=$3 + + echo "Checking if $name job result ('$result') is '$expected'..." + if [[ "$result" != "$expected" ]]; then + echo "$name job failed" + + status="failed" + fi + } + + # Note that run-workflow is different: + check_result "build-workflow" "success" "${{needs.build-workflow.result}}" + check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}" + + if [[ "$status" == "failed" ]]; then + exit 1 + fi diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml new file mode 100644 index 0000000000..3ff29cfeb2 --- /dev/null +++ b/.github/workflows/ci-workflow-pull-request.yml @@ -0,0 +1,134 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the main workflow that runs on every PR and push to main +name: pull_request + +defaults: + run: + shell: bash --noprofile --norc -euo pipefail {0} + +on: + push: + branches: + - "pull-request/[0-9]+" + +concurrency: + group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +jobs: + + build-workflow: + name: Build workflow from matrix + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + outputs: + workflow: ${{ steps.build-workflow.outputs.workflow }} + workflow_keys: ${{ steps.build-workflow.outputs.workflow_keys }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Lookup PR info + id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + - name: Build workflow + id: build-workflow + uses: ./.github/actions/workflow-build + with: + skip_tests: ${{ toJSON(contains(github.event.head_commit.message, '[skip-tests]')) }} + inspect_changes_script: ${{ toJSON(!contains(github.event.head_commit.message, '[all-projects]') && 'ci/inspect_changes.sh' || '') }} + inspect_changes_base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} + workflows: >- + ${{ !contains(github.event.head_commit.message, '[workflow:!pull_request]') && 'pull_request' || '' }} + ${{ contains(github.event.head_commit.message, '[workflow:nightly]') && 'nightly' || '' }} + ${{ contains(github.event.head_commit.message, '[workflow:test]') && 'test' || '' }} + + run-workflow: + name: ${{ matrix.name }} + needs: build-workflow + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + name: ${{ fromJSON(needs.build-workflow.outputs.workflow_keys) }} + uses: ./.github/workflows/workflow-dispatch.yml + with: + name: ${{ matrix.name }} + jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }} + + verify-devcontainers: + name: Verify Dev Containers + permissions: + id-token: write + contents: read + uses: ./.github/workflows/verify-devcontainers.yml + + # Check all other job statuses. This job gates branch protection checks. + ci: + name: CI + if: ${{ always() && !cancelled() }} + needs: + - build-workflow + - run-workflow + - verify-devcontainers + permissions: + contents: read + pull-requests: write # Posts a comment back to the PR. + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Get Base Branch from PR + id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + + - name: Check workflow success + id: check-workflow + uses: ./.github/actions/workflow-results + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + pr_number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }} + + - name: Check results + run: | + status="passed" + + check_result() { + name=$1 + expected=$2 + result=$3 + + echo "Checking if $name job result ('$result') is '$expected'..." + if [[ "$result" != "$expected" ]]; then + echo "$name job failed" + + status="failed" + fi + } + + # Note that run-workflow is different: + check_result "build-workflow" "success" "${{needs.build-workflow.result}}" + check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}" + check_result "verify-devcontainers" "success" "${{needs.verify-devcontainers.result}}" + + if [[ "$status" == "failed" ]]; then + exit 1 + fi diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml deleted file mode 100644 index 7b5ed4ef27..0000000000 --- a/.github/workflows/dispatch-build-and-test.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Dispatch build and test - -on: - workflow_call: - inputs: - project_name: {type: string, required: true} - per_cuda_compiler_matrix: {type: string, required: true} - devcontainer_version: {type: string, required: true} - is_windows: {type: boolean, required: true} - -permissions: - contents: read - -jobs: - # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration - # ensures that the build/test steps can overlap across different configurations. For example, - # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11. - build_and_test_linux: - name: build and test linux - permissions: - id-token: write - contents: read - if: ${{ !inputs.is_windows }} - uses: ./.github/workflows/build-and-test-linux.yml - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} - with: - cpu: ${{ matrix.cpu }} - test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}} - build_script: './ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"' - test_script: './ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"' - container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }} - - build_and_test_windows: - name: build and test windows - permissions: - id-token: write - contents: read - if: ${{ inputs.is_windows }} - uses: ./.github/workflows/build-and-test-windows.yml - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} - with: - test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} - build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}}" - container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}} diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml deleted file mode 100644 index 3dcee0cf6c..0000000000 --- a/.github/workflows/pr.yml +++ /dev/null @@ -1,242 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This is the main workflow that runs on every PR and push to main -name: pr - -defaults: - run: - shell: bash -euo pipefail {0} - -on: - push: - branches: - - "pull-request/[0-9]+" - -# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts. -concurrency: - group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} - cancel-in-progress: true - -permissions: - contents: read - pull-requests: read - -jobs: - inspect-changes: - name: "Inspect Changes" - runs-on: ubuntu-latest - outputs: - LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }} - CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }} - THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }} - steps: - - name: Get Base Branch from PR - id: get-pr-info - uses: nv-gha-runners/get-pr-info@main - - name: Checkout repo - uses: actions/checkout@v3 - - name: Identify dirty subprojects - id: set-outputs - run: | - ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA} - env: - BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} - - compute-matrix: - name: Compute matrix - runs-on: ubuntu-latest - needs: - - inspect-changes - outputs: - DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}} - PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}} - PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}} - NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}} - CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}} - CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}} - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Compute matrix outputs - id: set-outputs - run: | - .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request - env: - THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }} - CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }} - LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }} - - nvrtc: - name: libcudacxx NVRTC CUDA${{matrix.cuda}} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} - uses: ./.github/workflows/run-as-coder.yml - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }} - with: - name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}} - runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}} - - thrust: - name: Thrust CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "thrust" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - cub: - name: CUB CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "cub" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - libcudacxx: - name: libcudacxx CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "libcudacxx" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - clang-cuda: - name: ${{matrix.lib}} Clang CUDA - permissions: - id-token: write - contents: read - needs: compute-matrix - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }} - uses: ./.github/workflows/run-as-coder.yml - with: - name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}} - runner: linux-${{matrix.cpu}}-cpu16 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}" - - cccl-infra: - name: CCCL Infrastructure - permissions: - id-token: write - contents: read - needs: compute-matrix - if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }} - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }} - uses: ./.github/workflows/run-as-coder.yml - with: - name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}} - runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} - ctest --preset=cccl-infra - - verify-devcontainers: - name: Verify Dev Containers - permissions: - id-token: write - contents: read - uses: ./.github/workflows/verify-devcontainers.yml - - verify-codegen: - name: Verify Codegen in libcudacxx - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Run libcudacxx codegen verification - id: verify-codegen - run: | - sudo apt-get update - sudo apt-get install ninja-build - export CXX="g++" - ./ci/verify_codegen.sh - - # This job is the final job that runs after all other jobs and is used for branch protection status checks. - # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks - # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101 - ci: - runs-on: ubuntu-latest - name: CI - if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success - needs: - - clang-cuda - - cub - - libcudacxx - - nvrtc - - thrust - - cccl-infra - - verify-devcontainers - - verify-codegen - steps: - - name: Check status of all precursor jobs - if: >- - ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}} - run: exit 1 diff --git a/.github/workflows/run-as-coder.yml b/.github/workflows/run-as-coder.yml deleted file mode 100644 index 5430e6c0ae..0000000000 --- a/.github/workflows/run-as-coder.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: Run as coder user - -defaults: - run: - shell: bash -exo pipefail {0} - - -on: - workflow_call: - inputs: - name: {type: string, required: true} - image: {type: string, required: true} - runner: {type: string, required: true} - command: {type: string, required: true} - env: { type: string, required: false, default: "" } - -permissions: - contents: read - -jobs: - run-as-coder: - name: ${{inputs.name}} - permissions: - id-token: write - contents: read - runs-on: ${{inputs.runner}} - container: - options: -u root - image: ${{inputs.image}} - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - steps: - - name: Checkout repo - uses: actions/checkout@v3 - with: - path: cccl - persist-credentials: false - - name: Move files to coder user home directory - run: | - cp -R cccl /home/coder/cccl - chown -R coder:coder /home/coder/ - - name: Add NVCC problem matcher - run: | - echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json" - - name: Configure credentials and environment variables for sccache - uses: ./cccl/.github/actions/configure_cccl_sccache - - name: Run command - shell: su coder {0} - run: | - set -eo pipefail - cd ~/cccl - echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m" - echo -e "\e[1;34m${{inputs.command}}\e[0m" - eval "${{inputs.command}}" || exit_code=$? - if [ ! -z "$exit_code" ]; then - echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m" - echo "::error:: To replicate this failure locally, follow the steps below:" - echo "1. Clone the repository, and navigate to the correct branch and commit:" - echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA" - echo "" - echo "2. Run the failed command inside the same Docker container used by the CI:" - echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}" - echo "" - echo "For additional information, see:" - echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md" - echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md" - exit $exit_code - fi diff --git a/.github/workflows/workflow-dispatch-job.yml b/.github/workflows/workflow-dispatch-job.yml new file mode 100644 index 0000000000..641e054659 --- /dev/null +++ b/.github/workflows/workflow-dispatch-job.yml @@ -0,0 +1,166 @@ +name: "Workflow/Dispatch/Job" + +# Important note about depending on this workflow: The `result` will be a failure, even if successful. +# +# This reusable workflow dispatches to a number of internal jobs. Only one job will run, +# and some may be in error states due to empty matrices (which are used instead of `if` to keep +# skipped dispatch jobs out of the GHA UI). The `continue-on-error` flag should prevent these +# errors from failing the workflow, but this does not work. +# +# Thus, the `result` of this workflow will always be a failure, even if the job itself is successful. +# +# Instead, the results from each job is uploaded as an artifact. See the workflow_results action for more details. +# To depend on this job, you should use the `success` output instead: +# +# ``` +# dependent_job: +# needs: dispatch-job +# if: ${{ !cancelled() && needs.dispatch-job.outputs.success }} +# ``` + +defaults: + run: + shell: bash --noprofile --norc -euo pipefail {0} + +on: + workflow_call: + outputs: + success: + value: ${{ contains(toJSON(jobs.*.outputs.success), 'true') }} + inputs: + name: {type: string, required: true} + image: {type: string, required: true} + runner: {type: string, required: true} + command: {type: string, required: true} + id: {type: string, required: true} + env: {type: string, required: false} + dummy_matrix: {type: string, required: false, default: '[{"valid": true}]'} + +permissions: + contents: read + +jobs: + linux: + name: ${{inputs.name}} + continue-on-error: ${{ ! startsWith(inputs.runner, 'linux') }} + outputs: + success: ${{ steps.done.outputs.SUCCESS }} + permissions: + id-token: write + contents: read + strategy: + matrix: + include: ${{ fromJSON(startsWith(inputs.runner, 'linux') && inputs.dummy_matrix || '[]') }} + runs-on: ${{inputs.runner}} + container: + options: -u root + image: ${{inputs.image}} + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + path: cccl + persist-credentials: false + - name: Move files to coder user home directory + run: | + cp -R cccl /home/coder/cccl + chown -R coder:coder /home/coder/ + - name: Add NVCC problem matcher + run: | + echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json" + - name: Configure credentials and environment variables for sccache + uses: ./cccl/.github/actions/configure_cccl_sccache + - name: Run command + shell: su coder {0} + env: + # Dereferencing the command from and env var instead of a GHA input avoids issues with escaping + # semicolons and other special characters (e.g. `-arch "60;70;80"`). + COMMAND: ${{inputs.command}} + run: | + set -eo pipefail + cd ~/cccl + echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m" + echo -e "\e[1;34m${COMMAND}\e[0m" + eval "${COMMAND}" || exit_code=$? + if [ ! -z "$exit_code" ]; then + echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m" + echo "::error:: To replicate this failure locally, follow the steps below:" + echo "1. Clone the repository, and navigate to the correct branch and commit:" + echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA" + echo "" + echo "2. Run the failed command inside the same Docker container used by the CI:" + echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${COMMAND}" + echo "" + echo "For additional information, see:" + echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md" + echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md" + exit $exit_code + fi + - name: Mark job as successful + id: done + run: | + echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT} + mkdir dispatch-job-success + touch dispatch-job-success/${{inputs.id}} + - name: Upload dispatch-job-success + uses: actions/upload-artifact@v3 + with: + name: dispatch-job-success + path: dispatch-job-success/${{inputs.id}} + + windows: + name: ${{inputs.name}} + continue-on-error: ${{ ! startsWith(inputs.runner, 'windows') }} + outputs: + success: ${{ steps.done.outputs.SUCCESS }} + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{ fromJSON(startsWith(inputs.runner, 'windows') && inputs.dummy_matrix || '[]') }} + runs-on: ${{inputs.runner}} + env: + SCCACHE_BUCKET: rapids-sccache-devs + SCCACHE_REGION: us-east-2 + SCCACHE_IDLE_TIMEOUT: 0 + SCCACHE_S3_USE_SSL: true + SCCACHE_S3_NO_CREDENTIALS: false + steps: + - name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + aws-region: us-east-2 + role-duration-seconds: 43200 # 12 hours + - name: Fetch ${{ inputs.image }} + run: docker pull ${{ inputs.image }} + - name: Run Command + run: >- + docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}') + [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}') + [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}') + git clone https://github.com/NVIDIA/cccl.git; + cd cccl; + git fetch --all; + git checkout ${{github.ref_name}}; + ${{inputs.command}}" + - name: Mark job as successful + id: done + run: | + echo "SUCCESS=true" | tee -a ${GITHUB_OUTPUT} + mkdir dispatch-job-success + touch dispatch-job-success/${{inputs.id}} + - name: Upload dispatch-job-success + uses: actions/upload-artifact@v3 + with: + name: dispatch-job-success + path: dispatch-job-success/${{inputs.id}} diff --git a/.github/workflows/workflow-dispatch-two-stage.yml b/.github/workflows/workflow-dispatch-two-stage.yml new file mode 100644 index 0000000000..65b6d1eba6 --- /dev/null +++ b/.github/workflows/workflow-dispatch-two-stage.yml @@ -0,0 +1,64 @@ +name: "Workflow/Dispatch/TwoStage" + +defaults: + run: + shell: bash --noprofile --norc -euo pipefail {0} + +on: + workflow_call: + inputs: + producers: {type: string, required: true} + consumers: {type: string, required: true} + +permissions: + contents: read + +jobs: + # It is impossible to accumulate output variables across a matrix, + # and we cannot rely on the results of the dispatch-job workflow to determine success. + # See the note in ci-dispatch-job.yml for more information. + # + # Since we cannot accumulate results from multiple producers, only support a single producer for now. + # This is enforced by compute-matrix.py. + producers: + # This is an internal dispatch job and the name is not important. + # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values. + # This keeps the UI from getting cluttered. + name: "p.${{ matrix.id }}" + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.producers)}} + uses: ./.github/workflows/workflow-dispatch-job.yml + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + id: ${{ matrix.id }} + + consumers: + # This is an internal dispatch job and the name is not important. + # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values. + # This keeps the UI from getting cluttered. + name: "c.${{ matrix.id }}" + needs: producers + # dispatch-job's result is always false, check the outputs instead. See ci-dispatch-job.yml for more information. + if: ${{ !cancelled() && fromJson(needs.producers.outputs.success) }} + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.consumers)}} + uses: ./.github/workflows/workflow-dispatch-job.yml + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + id: ${{ matrix.id }} diff --git a/.github/workflows/workflow-dispatch.yml b/.github/workflows/workflow-dispatch.yml new file mode 100644 index 0000000000..adab0f6f36 --- /dev/null +++ b/.github/workflows/workflow-dispatch.yml @@ -0,0 +1,53 @@ +name: "Workflow/Dispatch/Group" + +defaults: + run: + shell: bash --noprofile --norc -euo pipefail {0} + +on: + workflow_call: + inputs: + name: {type: string, required: true} + jobs: {type: string, required: true} + +permissions: + contents: read + +jobs: + + standlone-jobs: + # This is an internal dispatch job and the name is not important. + # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values. + # This keeps the UI from getting cluttered. + name: "s.${{ matrix.id }}" + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.jobs)['standalone']}} + uses: ./.github/workflows/workflow-dispatch-job.yml + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + id: ${{ matrix.id }} + + two-stage-jobs: + # This is an internal dispatch job and the name is not important. + # Give the job a short and unique name, otherwise github will bloat the job name with the matrix values. + # This keeps the UI from getting cluttered. + name: "t.${{ matrix.id }}" + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.jobs)['two_stage']}} + uses: ./.github/workflows/workflow-dispatch-two-stage.yml + with: + producers: ${{ toJSON(matrix.producers) }} + consumers: ${{ toJSON(matrix.consumers) }} diff --git a/.gitignore b/.gitignore index ceb7d48ce5..4f09d2f5e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .idea/ -*build*/ +build*/ .cache .aws .config diff --git a/CMakePresets.json b/CMakePresets.json index dbbc80a4d9..290024f565 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -60,7 +60,7 @@ "name": "all-dev-debug", "displayName": "all-dev debug", "inherits": "all-dev", - "cacheVariables":{ + "cacheVariables": { "CCCL_ENABLE_BENCHMARKS": false, "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CUDA_FLAGS": "-G" diff --git a/ci-overview.md b/ci-overview.md index 56314fbab3..d91781ad52 100644 --- a/ci-overview.md +++ b/ci-overview.md @@ -33,6 +33,8 @@ Special commands are provided that can be included in commit messages to direct - **Example:** `git commit -m "[skip ci] Update README."` - `[skip-tests]`: Skips CI jobs that execute tests, but runs all other jobs. Useful to avoid time-consuming tests when changes are unlikely to affect them. +- `[all-projects]`: CI normally skips projects that don't have changes in themselves or their dependencies. This forces all projects to build. +- `[workflow:]`: Execute jobs from the named workflow. Example: `[workflow:nightly]` runs all jobs defined in `matrix.yaml`'s `workflows.nightly` list. Use these commands judiciously. While they offer flexibility, they should be used appropriately to maintain the codebase's integrity and quality. diff --git a/ci/build_common.sh b/ci/build_common.sh index 239d463ead..a06af83641 100755 --- a/ci/build_common.sh +++ b/ci/build_common.sh @@ -216,10 +216,13 @@ function test_preset() { local BUILD_NAME=$1 local PRESET=$2 - local GROUP_NAME="🚀 Test ${BUILD_NAME}" + local GPU_REQUIRED=${3:-"true"} - fail_if_no_gpu + if [ "${GPU_REQUIRED}" == "true" ]; then + fail_if_no_gpu + fi + local GROUP_NAME="🚀 Test ${BUILD_NAME}" ctest_log_dir="${BUILD_DIR}/log/ctest" ctest_log="${ctest_log_dir}/${PRESET}" diff --git a/ci/infra_cccl.sh b/ci/infra_cccl.sh new file mode 100755 index 0000000000..475799ace2 --- /dev/null +++ b/ci/infra_cccl.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +source "$(dirname "$0")/build_common.sh" + +print_environment_details + +PRESET="cccl-infra" + +CMAKE_OPTIONS="" + +GPU_REQUIRED="false" + +if [ -n "${GITHUB_SHA:-}" ]; then + CMAKE_OPTIONS="$CMAKE_OPTIONS -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}" +fi + +configure_preset "CCCL Infra" "$PRESET" "$CMAKE_OPTIONS" +test_preset "CCCL Infra" "$PRESET" "$GPU_REQUIRED" + +print_time_summary diff --git a/ci/inspect_changes.sh b/ci/inspect_changes.sh index 59500a7055..342ce22493 100755 --- a/ci/inspect_changes.sh +++ b/ci/inspect_changes.sh @@ -21,6 +21,7 @@ base_sha=$(git merge-base $head_sha $base_sha) # Define a list of subproject directories: subprojects=( + cccl libcudacxx cub thrust @@ -28,17 +29,33 @@ subprojects=( # ...and their dependencies: declare -A dependencies=( + [cccl]="" [libcudacxx]="cccl" [cub]="cccl libcudacxx thrust" [thrust]="cccl libcudacxx cub" ) +declare -A project_names=( + [cccl]="CCCL Infrastructure" + [libcudacxx]="libcu++" + [cub]="CUB" + [thrust]="Thrust" +) + write_output() { local key="$1" local value="$2" echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" } +tee_to_step_summary() { + if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then + tee -a "${GITHUB_STEP_SUMMARY}" + else + cat + fi +} + dirty_files() { git diff --name-only "${base_sha}" "${head_sha}" } @@ -90,26 +107,14 @@ add_dependencies() { return 0 } -# write_subproject_status -# Write the output _DIRTY={true|false} -write_subproject_status() { - local subproject="$1" - local dirty_flag=${subproject^^}_DIRTY - - if [[ ${!dirty_flag} -ne 0 ]]; then - write_output "${dirty_flag}" "true" - else - write_output "${dirty_flag}" "false" - fi -} - main() { # Print the list of subprojects and all of their dependencies: echo "Subprojects: ${subprojects[*]}" echo echo "Dependencies:" for subproject in "${subprojects[@]}"; do - echo " - ${subproject} -> ${dependencies[$subproject]}" + printf " - %-27s -> %s\n" "$subproject (${project_names[$subproject]})" "${dependencies[$subproject]}" + done echo @@ -117,36 +122,74 @@ main() { echo "HEAD SHA: ${head_sha}" echo + check="+/-" + no_check=" " + get_checkmark() { + if [[ $1 -eq 0 ]]; then + echo "$no_check" + else + echo "$check" + fi + } + # Print the list of files that have changed: - echo "Dirty files:" + echo "::group::Dirty files" dirty_files | sed 's/^/ - /' - echo "" + echo "::endgroup::" + echo + + + echo "

👃 Inspect Changes

" | tee_to_step_summary + echo | tee_to_step_summary + + echo -e "### Modifications in project?\n" | tee_to_step_summary + echo "| | Project" | tee_to_step_summary + echo "|-----|---------" | tee_to_step_summary - echo "Modifications in project?" # Assign the return value of `inspect_cccl` to the variable `CCCL_DIRTY`: inspect_cccl CCCL_DIRTY=$? - echo "$(if [[ ${CCCL_DIRTY} -eq 0 ]]; then echo " "; else echo "X"; fi) - CCCL Infrastructure" + checkmark="$(get_checkmark ${CCCL_DIRTY})" + echo "| ${checkmark} | ${project_names[cccl]}" | tee_to_step_summary # Check for changes in each subprojects directory: for subproject in "${subprojects[@]}"; do + if [[ ${subproject} == "cccl" ]]; then + # Special case handled above. + continue + fi + inspect_subdir $subproject - declare ${subproject^^}_DIRTY=$? - echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}" + local dirty=$? + declare ${subproject^^}_DIRTY=${dirty} + checkmark="$(get_checkmark ${dirty})" + echo "| ${checkmark} | ${project_names[$subproject]}" | tee_to_step_summary done - echo + echo | tee_to_step_summary + + echo -e "### Modifications in project or dependencies?\n" | tee_to_step_summary + echo "| | Project" | tee_to_step_summary + echo "|-----|---------" | tee_to_step_summary - echo "Modifications in project or dependencies?" for subproject in "${subprojects[@]}"; do add_dependencies ${subproject} - declare ${subproject^^}_DIRTY=$? - echo "$(if [[ ${subproject^^}_DIRTY -eq 0 ]]; then echo " "; else echo "X"; fi) - ${subproject}" + local dirty=$? + declare ${subproject^^}_DIRTY=${dirty} + checkmark="$(get_checkmark ${dirty})" + echo "| ${checkmark} | ${project_names[$subproject]}" | tee_to_step_summary done - echo + echo "
" | tee_to_step_summary + + declare -a dirty_subprojects=() for subproject in "${subprojects[@]}"; do - write_subproject_status ${subproject} + var_name="${subproject^^}_DIRTY" + if [[ ${!var_name} -ne 0 ]]; then + dirty_subprojects+=("$subproject") + fi done + + write_output "DIRTY_PROJECTS" "${dirty_subprojects[*]}" } main "$@" diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 589de44bd3..42d2cb88c3 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -1,12 +1,6 @@ - -cuda_prev_min: &cuda_prev_min '11.1' -cuda_prev_max: &cuda_prev_max '11.8' -cuda_curr: &cuda_curr '12.4' - -# The GPUs to test on -gpus: - - 'a100' - - 'v100' +ctk_11_1: &ctk_11_1 '11.1' +ctk_11_8: &ctk_11_8 '11.8' +ctk_curr: &ctk_curr '12.4' # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers devcontainer_version: '24.06' @@ -42,54 +36,241 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' } # oneAPI configs oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' } -# Each environment below will generate a unique build/test job -# See the "compute-matrix" job in the workflow for how this is parsed and used -# cuda: The CUDA Toolkit version -# os: The operating system used -# cpu: The CPU architecture -# compiler: The compiler to use -# name: The compiler name -# version: The compiler version -# exe: The unverionsed compiler binary name -# std: The C++ standards to build for -# This field is unique as it will generate an independent build/test job for each value - -# Configurations that will run for every PR -pull_request: - nvcc: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc6, std: [11, 14], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [14, 17], jobs: ['build']} - - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build', 'test']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [11, 14, 17, 20], jobs: ['build', 'test']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi, std: [11, 14, 17], jobs: ['build']} - nvrtc: - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', std: [11, 14, 17, 20]} - clang-cuda: - - {lib: ['thrust', 'cub', 'libcudacxx'], cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest, std: [17, 20]} - cccl-infra: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest} +# GHA Workflow job matrices: +workflows: + pull_request: + # default_projects: nvcc + - {jobs: ['build'], std: 'all', ctk: *ctk_11_1, cxx: [*gcc6, *gcc7, *gcc8, *gcc9, *llvm9, *msvc2017]} + - {jobs: ['build'], std: 'all', ctk: *ctk_11_8, cxx: [*gcc11], sm: '60;70;80;90'} + - {jobs: ['build'], std: 'all', cxx: [*gcc7, *gcc8, *gcc9, *gcc10, *gcc11]} + - {jobs: ['build'], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14, *llvm15]} + - {jobs: ['test'], std: 'all', cxx: [*gcc12, *llvm16]} + - {jobs: ['build'], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'} + - {jobs: ['build'], std: 'all', cxx: [*gcc12], sm: '90a'} + - {jobs: ['build'], std: 'all', cxx: [*oneapi]} + - {jobs: ['build'], std: 'all', cxx: [*msvc2019, *msvc2022]} + # default_projects: clang-cuda + - {jobs: ['build'], std: [17, 20], cudacxx: *llvm-newest, cxx: *llvm-newest} + # nvrtc: + - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'} + # verify-codegen: + - {jobs: ['verify_codegen'], project: 'libcudacxx'} + # cccl-infra: + - {jobs: ['infra'], project: 'cccl', ctk: *ctk_11_1, cxx: [*gcc-oldest, *llvm-oldest]} + - {jobs: ['infra'], project: 'cccl', ctk: *ctk_curr, cxx: [*gcc-newest, *llvm-newest]} + nightly: + # libcudacxx build fails, CUB tests fail: + - {jobs: ['build'], ctk: *ctk_11_1, gpu: 'v100', sm: 'gpu', cxx: *gcc6, std: [11], project: ['cub']} + - {jobs: ['test'], ctk: *ctk_11_1, gpu: 'v100', sm: 'gpu', cxx: *gcc6, std: [11], project: ['thrust']} + # - {jobs: ['test'], ctk: *ctk_11_1, gpu: 'v100', sm: 'gpu', cxx: *gcc6, std: [11] } + + # libcudacxx build fails, CUB tests fail: + - {jobs: ['build'], ctk: *ctk_11_1, gpu: 't4', sm: 'gpu', cxx: *llvm9, std: [17], project: ['cub']} + - {jobs: ['test'], ctk: *ctk_11_1, gpu: 't4', sm: 'gpu', cxx: *llvm9, std: [17], project: ['thrust']} + # - {jobs: ['test'], ctk: *ctk_11_1, gpu: 't4', sm: 'gpu', cxx: *llvm9, std: [17] } + + # CUB + libcudacxx tests fails: + - {jobs: ['build'], ctk: *ctk_11_8, gpu: 'rtx2080', sm: 'gpu', cxx: *gcc11, std: [17], project: ['libcudacxx', 'cub']} + - {jobs: ['test'], ctk: *ctk_11_8, gpu: 'rtx2080', sm: 'gpu', cxx: *gcc11, std: [17], project: ['thrust']} + # - {jobs: ['test'], ctk: *ctk_11_8, gpu: 'rtx2080', sm: 'gpu', cxx: *gcc11, std: [17] } + + # libcudacxx tests fail: + - {jobs: ['build'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7, std: [14], project: ['libcudacxx']} + - {jobs: ['build'], ctk: *ctk_curr, gpu: 'l4', sm: 'gpu', cxx: *gcc12, std: 'all', project: ['libcudacxx']} + - {jobs: ['build'], ctk: *ctk_curr, gpu: 'rtx4090', sm: 'gpu', cxx: *llvm9, std: [11], project: ['libcudacxx']} + - {jobs: ['build'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *gcc12, std: [11, 20], project: ['libcudacxx']} + - {jobs: ['build'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *llvm16, std: [17], project: ['libcudacxx']} + - {jobs: ['test'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7, std: [14], project: ['cub', 'thrust']} + - {jobs: ['test'], ctk: *ctk_curr, gpu: 'l4', sm: 'gpu', cxx: *gcc12, std: 'all', project: ['cub', 'thrust']} + - {jobs: ['test'], ctk: *ctk_curr, gpu: 'rtx4090', sm: 'gpu', cxx: *llvm9, std: [11], project: ['cub', 'thrust']} + - {jobs: ['test'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *gcc12, std: [11, 20], project: ['cub', 'thrust']} + - {jobs: ['test'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *llvm16, std: [17], project: ['cub', 'thrust']} + # - {jobs: ['test'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7, std: [14] } + # - {jobs: ['test'], ctk: *ctk_curr, gpu: 'l4', sm: 'gpu', cxx: *gcc12, std: 'all' } + # - {jobs: ['test'], ctk: *ctk_curr, gpu: 'rtx4090', sm: 'gpu', cxx: *llvm9, std: [11] } + # - {jobs: ['test'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *gcc12, std: [11, 20] } + # - {jobs: ['test'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *llvm16, std: [17] } + + # nvrtc: + - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 't4', sm: 'gpu', cxx: *gcc12, std: [20], project: ['libcudacxx']} + - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc12, std: [20], project: ['libcudacxx']} + - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4', sm: 'gpu', cxx: *gcc12, std: 'all', project: ['libcudacxx']} + - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100', sm: 'gpu', cxx: *gcc12, std: [11, 20], project: ['libcudacxx']} + +# +# Resources for compute_matrix.py. These can be modified to add new jobs, etc. +# +# Jobs are executed by running scripts: +# - Linux: 'ci/_.sh` +# - Windows: `ci/windows/_.bat` + +# A matrix entry must have the following tag. +required_tags: + - 'jobs' # A list of job types to run (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...) for + # the specified configuration(s). + +# If a matrix entry omits these tags, a default value (defined later in `default_`) is used. +defaulted_tags: + - 'ctk' # CUDA ToolKit version. Will be exploded if a list. + - 'cpu' # CPU architecture. Will be exploded if a list. + - 'gpu' # GPU model. Will be exploded if a list. + - 'cxx' # Host compiler {name, version, exe}. Will be exploded if a list. + - 'cudacxx' # Device compiler as {name, version, exe} or 'nvcc' to use nvcc from the specified `ctk`. + # Will be exploded if a list. + - 'project' # Project name (e.g. libcudacxx, cub, thrust, cccl). Will be exploded if a list. + - 'os' # Operating system. Will be exploded if a list. + +# These tags will only exist if needed: +optional_tags: + - 'std' # C++ standard. Passed to script with `-std `. Will be exploded if a list. + # If set to 'all', all stds supported by the host/device compiler are used. + - 'sm' # `CMAKE_CUDA_ARCHITECTURES` Passed to script with `-arch `. + # Defaults to use the settings in the CMakePresets.json file. + # Set to 'gpu' to only target the GPU in the `gpu` tag. + # Can pass multiple architectures via "60;70-real;80-virtual" + # Will be exploded if a list (e.g. `sm: ['60;70;80;90', '90a']` creates two jobs) + - 'cmake_options' # Additional CMake options to pass to the build. Passed to script with `-cmake_options ""`. + # Will be exploded if a list. + +# `default_`: Used when the tag is omitted. +default_ctk: *ctk_curr +default_cudacxx: 'nvcc' +default_cxx: *gcc12 +default_cpu: 'amd64' +default_gpu: 'v100' +default_project: + - 'libcudacxx' + - 'cub' + - 'thrust' +# Special handling: lookup os from ctk/cxx info +# See `matrix.yml` at https://github.com/rapidsai/devcontainers +default_os_lookup: + 'ctk11.1-gcc6': 'ubuntu18.04' + 'ctk11.1-gcc7': 'ubuntu18.04' + 'ctk11.1-gcc8': 'ubuntu18.04' + 'ctk11.1-gcc9': 'ubuntu18.04' + 'ctk11.1-llvm9': 'ubuntu18.04' + 'ctk11.1-cl14.16': 'windows2022' + 'ctk11.8-gcc11': 'ubuntu22.04' + 'ctk12.4-gcc7': 'ubuntu20.04' + 'ctk12.4-gcc8': 'ubuntu20.04' + 'ctk12.4-gcc9': 'ubuntu20.04' + 'ctk12.4-gcc10': 'ubuntu20.04' + 'ctk12.4-gcc11': 'ubuntu22.04' + 'ctk12.4-gcc12': 'ubuntu22.04' + 'ctk12.4-llvm9': 'ubuntu20.04' + 'ctk12.4-llvm10': 'ubuntu20.04' + 'ctk12.4-llvm11': 'ubuntu20.04' + 'ctk12.4-llvm12': 'ubuntu20.04' + 'ctk12.4-llvm13': 'ubuntu20.04' + 'ctk12.4-llvm14': 'ubuntu20.04' + 'ctk12.4-llvm15': 'ubuntu22.04' + 'ctk12.4-llvm16': 'ubuntu22.04' + 'ctk12.4-cl14.29': 'windows2022' + 'ctk12.4-cl14.39': 'windows2022' + 'ctk12.4-oneapi2023.2.0': 'ubuntu22.04' + +# Lookup supported C++ standards for a given compiler when `std: 'all'`. +all_stds: [11, 14, 17, 20] +lookup_cxx_supported_stds: + 'gcc6': [11, 14 ] + 'gcc7': [11, 14, 17 ] + 'gcc8': [11, 14, 17 ] + 'gcc9': [11, 14, 17 ] + 'gcc10': [11, 14, 17, 20] + 'gcc11': [11, 14, 17, 20] + 'gcc12': [11, 14, 17, 20] + 'llvm9': [11, 14, 17 ] + 'llvm10': [11, 14, 17 ] + 'llvm11': [11, 14, 17, 20] + 'llvm12': [11, 14, 17, 20] + 'llvm13': [11, 14, 17, 20] + 'llvm14': [11, 14, 17, 20] + 'llvm15': [11, 14, 17, 20] + 'llvm16': [11, 14, 17, 20] + 'cl14.16': [ 14 ] + 'cl14.29': [ 14, 17 ] + 'cl14.39': [ 14, 17, 20] + 'oneapi2023.2.0': [11, 14, 17 ] +lookup_cudacxx_supported_stds: + 'nvcc11.1': [11, 14, 17 ] + 'nvcc11.8': [11, 14, 17 ] + 'nvcc12.4': [11, 14, 17, 20] + 'llvm16': [11, 14, 17, 20] + +# Tags that aren't exploded: +non_exploded_tags: + - 'jobs' # Keeping jobs as a list allows for dependency handling of build->test steps. + +# Jobs that have an implied prerequisite 'build' job: +build_required_jobs: + - 'test' + +# Jobs that require a GPU +gpu_required_jobs: + - 'test' + - 'nvrtc' + - 'infra' # cccl infra's example project test launches a kernel + +# When --skip-tests is given to compute-matrix.py, these jobs are ignored. +skip_test_jobs: + - 'test' + - 'nvrtc' + - 'infra' + +# Human readable name for jobs. Default behavior is to capitalize the first letter. +formatted_jobs: + 'nvrtc': 'NVRTC' + 'verify_codegen': 'VerifyCodegen' + +# Human readable name for projects. Default behavior uses the project name as-is. +formatted_project_names: + 'libcudacxx': 'libcu++' + 'cub': 'CUB' + 'thrust': 'Thrust' + 'cccl': 'CCCL' + +# Human readable name for compilers. Default behavior uses the "compiler.name" tag as-is. +formatted_cxx_names: + 'llvm': 'clang' + 'oneapi': 'Intel' + 'cl': 'MSVC' + +# All known GPUs +gpus: + - 'v100' # 40 runners + - 't4' # 8 runners + - 'rtx2080' # 8 runners + - 'rtxa6000' # 12 runners + - 'l4' # 48 runners + - 'rtx4090' # 10 runners + - 'h100' # 16 runners + +# SM versions of GPUs +gpu_sm: + 'v100': '70' + 't4': '75' + 'rtx2080': '75' + 'rtxa6000': '86' + 'l4': '89' + 'rtx4090': '89' + 'h100': '90' + +# Memory size of GPUs +gpu_mem_gb: + 'v100': '32' + 't4': '16' + 'rtx2080': '8' + 'rtxa6000': '48' + 'l4': '24' + 'rtx4090': '24' + 'h100': '80' + +# GPUs that require `-testing` at the end of the runner pool name. +testing_pool_gpus: + - 't4' + - 'rtx2080' + - 'rtxa6000' + - 'l4' + - 'rtx4090' + - 'h100' diff --git a/ci/verify_codegen.sh b/ci/verify_codegen_libcudacxx.sh similarity index 100% rename from ci/verify_codegen.sh rename to ci/verify_codegen_libcudacxx.sh From 09e8a8e96dbaf2981873ce58800ed0d6683028e0 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Fri, 3 May 2024 14:10:04 -0400 Subject: [PATCH 3/4] Branch protection WAR: #605 Reprise --- .github/workflows/ci-workflow-nightly.yml | 29 ++++++++----- .../workflows/ci-workflow-pull-request.yml | 41 ++++++++++++------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci-workflow-nightly.yml b/.github/workflows/ci-workflow-nightly.yml index ed1bb149b3..f6543149b4 100644 --- a/.github/workflows/ci-workflow-nightly.yml +++ b/.github/workflows/ci-workflow-nightly.yml @@ -61,10 +61,11 @@ jobs: name: ${{ matrix.name }} jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }} - # Check all other job statuses. This job gates branch protection checks. - ci: - name: CI - if: ${{ always() || !cancelled() }} + # This job acts as a sentry and will fail if any leaf job in the workflow tree fails, as + # run-workflow always succeeds. Use this job when checking for successful matrix workflow job completion. + verify-workflow: + name: Verify and summarize workflow results + if: ${{ always() && !cancelled() }} needs: - build-workflow - run-workflow @@ -78,9 +79,19 @@ jobs: - name: Check workflow success id: check-workflow uses: ./.github/actions/workflow-results - with: - github_token: ${{ secrets.GITHUB_TOKEN }} + # Check all other job statuses. This job gates branch protection checks. + ci: + name: CI + # !! Important: This job is used for branch protection checks. + # !! Need to use always() instead of !cancelled() because skipped jobs count as success + # !! for Github branch protection checks. Yes, really: by default, branch protections + # !! can be bypassed by cancelling CI. See NVIDIA/cccl#605. + if: ${{ always() }} + needs: + - verify-workflow + runs-on: ubuntu-latest + steps: - name: Check results run: | status="passed" @@ -98,10 +109,8 @@ jobs: fi } - # Note that run-workflow is different: - check_result "build-workflow" "success" "${{needs.build-workflow.result}}" - check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}" + check_result "verify-workflow" "success" "${{needs.verify-workflow.result}}" - if [[ "$status" == "failed" ]]; then + if [[ "$status" != "success" ]]; then exit 1 fi diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml index 3ff29cfeb2..8fc17bd5bf 100644 --- a/.github/workflows/ci-workflow-pull-request.yml +++ b/.github/workflows/ci-workflow-pull-request.yml @@ -73,21 +73,14 @@ jobs: name: ${{ matrix.name }} jobs: ${{ toJSON(fromJSON(needs.build-workflow.outputs.workflow)[matrix.name]) }} - verify-devcontainers: - name: Verify Dev Containers - permissions: - id-token: write - contents: read - uses: ./.github/workflows/verify-devcontainers.yml - - # Check all other job statuses. This job gates branch protection checks. - ci: - name: CI + # This job acts as a sentry and will fail if any leaf job in the workflow tree fails, as + # run-workflow always succeeds. Use this job when checking for successful matrix workflow job completion. + verify-workflow: + name: Verify and summarize workflow results if: ${{ always() && !cancelled() }} needs: - build-workflow - run-workflow - - verify-devcontainers permissions: contents: read pull-requests: write # Posts a comment back to the PR. @@ -107,6 +100,26 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} pr_number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }} + verify-devcontainers: + name: Verify Dev Containers + permissions: + id-token: write + contents: read + uses: ./.github/workflows/verify-devcontainers.yml + + # Check all other job statuses. This job gates branch protection checks. + ci: + name: CI + # !! Important: This job is used for branch protection checks. + # !! Need to use always() instead of !cancelled() because skipped jobs count as success + # !! for Github branch protection checks. Yes, really: by default, branch protections + # !! can be bypassed by cancelling CI. See NVIDIA/cccl#605. + if: ${{ always() }} + needs: + - verify-workflow + - verify-devcontainers + runs-on: ubuntu-latest + steps: - name: Check results run: | status="passed" @@ -124,11 +137,9 @@ jobs: fi } - # Note that run-workflow is different: - check_result "build-workflow" "success" "${{needs.build-workflow.result}}" - check_result "run-workflow" "true" "${{steps.check-workflow.outputs.success}}" + check_result "verify-workflow" "success" "${{needs.verify-workflow.result}}" check_result "verify-devcontainers" "success" "${{needs.verify-devcontainers.result}}" - if [[ "$status" == "failed" ]]; then + if [[ "$status" != "success" ]]; then exit 1 fi From 6f04c7eced979c70465074d22dacedd440420325 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Fri, 3 May 2024 21:59:48 +0000 Subject: [PATCH 4/4] Fix condition check. [skip-tests] since this is only changing infra. --- .github/workflows/ci-workflow-nightly.yml | 2 +- .github/workflows/ci-workflow-pull-request.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow-nightly.yml b/.github/workflows/ci-workflow-nightly.yml index f6543149b4..0fe41e42fc 100644 --- a/.github/workflows/ci-workflow-nightly.yml +++ b/.github/workflows/ci-workflow-nightly.yml @@ -94,7 +94,7 @@ jobs: steps: - name: Check results run: | - status="passed" + status="success" check_result() { name=$1 diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml index 8fc17bd5bf..ec4c898488 100644 --- a/.github/workflows/ci-workflow-pull-request.yml +++ b/.github/workflows/ci-workflow-pull-request.yml @@ -122,7 +122,7 @@ jobs: steps: - name: Check results run: | - status="passed" + status="success" check_result() { name=$1