From 4b8f2be74f64530124e5fcc4b82d91d4b94191d2 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 14 Aug 2024 23:02:24 +0200 Subject: [PATCH] Suppress warnings and add workarounds --- .../device/dispatch/dispatch_transform.cuh | 19 ++++++++------ cub/test/catch2_test_device_transform.cu | 25 +++++++++++-------- cub/test/catch2_test_device_transform_api.cu | 16 ++++++++++-- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/cub/cub/device/dispatch/dispatch_transform.cuh b/cub/cub/device/dispatch/dispatch_transform.cuh index 259d287598..51cab91a04 100644 --- a/cub/cub/device/dispatch/dispatch_transform.cuh +++ b/cub/cub/device/dispatch/dispatch_transform.cuh @@ -13,6 +13,11 @@ # pragma system_header #endif // no system header +_CCCL_NV_DIAG_SUPPRESS(186) // pointless comparison of unsigned integer with zero +#include +#include +_CCCL_NV_DIAG_DEFAULT(186) + #include #include @@ -37,9 +42,6 @@ #include #include -#include -#include - CUB_NAMESPACE_BEGIN namespace detail @@ -187,7 +189,7 @@ _CCCL_DEVICE void transform_kernel_impl( { // TODO(bgruber): replace by fold over comma in C++17 int dummy[] = {(arrays[j] = ins[idx], 0)..., 0}; // extra zero to handle empty packs - (void) dummy; + (void) &dummy[0]; // MSVC needs extra strong unused warning supression } } // process items_per_thread elements @@ -279,6 +281,8 @@ _CCCL_DEVICE void transform_kernel_impl( const auto smem_ptrs = ::cuda::std::tuple{ copy_and_return_smem_dst(group, tile_size, smem, smem_offset, global_offset, pointers)...}; cooperative_groups::wait(group); + (void) smem_ptrs; // suppress unused warning for MSVC + (void) smem_offset; // suppress unused warning for MSVC #pragma unroll 1 for (int i = 0; i < num_elem_per_thread; ++i) @@ -460,8 +464,9 @@ _CCCL_DEVICE void transform_kernel_impl( template struct kernel_arg { - using PS = ptr_set>; - alignas(::cuda::std::max(alignof(It), alignof(PS))) char storage[::cuda::std::max(sizeof(It), sizeof(PS))]; + using PS = ptr_set>; + static constexpr std::size_t alignment = ::cuda::std::max(alignof(It), alignof(PS)); // need extra variable for GCC<9 + alignas(alignment) char storage[::cuda::std::max(sizeof(It), sizeof(PS))]; template _CCCL_HOST_DEVICE T& aliased_storage() @@ -520,7 +525,7 @@ __launch_bounds__(MaxPolicy::ActivePolicy::algo_policy::BLOCK_THREADS) Offset len, int num_elem_per_thread, F f, RandomAccessIteartorOut out, kernel_arg... ins) { constexpr auto alg = ::cuda::std::integral_constant{}; - transform_kernel_impl( + transform_kernel_impl( alg, len, num_elem_per_thread, diff --git a/cub/test/catch2_test_device_transform.cu b/cub/test/catch2_test_device_transform.cu index 60cf303749..8255fb153e 100644 --- a/cub/test/catch2_test_device_transform.cu +++ b/cub/test/catch2_test_device_transform.cu @@ -90,17 +90,20 @@ using algorithms = using offset_types = c2h::type_list; -#define FILTER_UNSUPPORTED_ALGS \ - int ptx_version = 0; \ - REQUIRE(cub::PtxVersion(ptx_version) == cudaSuccess); \ - if (alg == Algorithm::memcpy_async && ptx_version < 800) \ - { \ - return; \ - } \ - if (alg == Algorithm::ublkcp && ptx_version < 900) \ - { \ - return; \ - } +#define FILTER_UNSUPPORTED_ALGS \ + int ptx_version = 0; \ + REQUIRE(cub::PtxVersion(ptx_version) == cudaSuccess); \ + _CCCL_DIAG_PUSH \ + _CCCL_DIAG_SUPPRESS_MSVC(4127) /* conditional expression is constant */ \ + if (alg == Algorithm::memcpy_async && ptx_version < 800) \ + { \ + return; \ + } \ + if (alg == Algorithm::ublkcp && ptx_version < 900) \ + { \ + return; \ + } \ + _CCCL_DIAG_POP CUB_TEST("DeviceTransform::Transform BabelStream add", "[device][device_transform]", diff --git a/cub/test/catch2_test_device_transform_api.cu b/cub/test/catch2_test_device_transform_api.cu index b3e9d48b5a..2d75454cc1 100644 --- a/cub/test/catch2_test_device_transform_api.cu +++ b/cub/test/catch2_test_device_transform_api.cu @@ -8,7 +8,8 @@ #include "catch2_test_helper.h" -CUB_TEST("DeviceTransform::Transform API example", "[device][device_transform]") +// need a separate function because the ext. lambda needs to be enclosed by a function with external linkage on Windows +void test_transform_api() { // example-begin transform-many constexpr auto num_items = 4; @@ -28,7 +29,13 @@ CUB_TEST("DeviceTransform::Transform API example", "[device][device_transform]") CHECK(result == expected); } -CUB_TEST("DeviceTransform::TransformStableArgumentAddresses API example", "[device][device_transform]") +CUB_TEST("DeviceTransform::Transform API example", "[device][device_transform]") +{ + test_transform_api(); +} + +// need a separate function because the ext. lambda needs to be enclosed by a function with external linkage on Windows +void test_transform_stable_api() { // example-begin transform-many-stable constexpr auto num_items = 4; @@ -49,3 +56,8 @@ CUB_TEST("DeviceTransform::TransformStableArgumentAddresses API example", "[devi // example-end transform-many-stable CHECK(result == expected); } + +CUB_TEST("DeviceTransform::TransformStableArgumentAddresses API example", "[device][device_transform]") +{ + test_transform_stable_api(); +}