Skip to content

Commit

Permalink
Suppress warnings and add workarounds
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Aug 16, 2024
1 parent 12a6506 commit 4b8f2be
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 20 deletions.
19 changes: 12 additions & 7 deletions cub/cub/device/dispatch/dispatch_transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
# pragma system_header
#endif // no system header

_CCCL_NV_DIAG_SUPPRESS(186) // pointless comparison of unsigned integer with zero
#include <cooperative_groups.h>
#include <cooperative_groups/memcpy_async.h>
_CCCL_NV_DIAG_DEFAULT(186)

#include <cuda.h>

#include <cub/detail/uninitialized_copy.cuh>
Expand All @@ -37,9 +42,6 @@
#include <cuda/std/type_traits>
#include <cuda/std/utility>

#include <cooperative_groups.h>
#include <cooperative_groups/memcpy_async.h>

CUB_NAMESPACE_BEGIN

namespace detail
Expand Down Expand Up @@ -187,7 +189,7 @@ _CCCL_DEVICE void transform_kernel_impl(
{
// TODO(bgruber): replace by fold over comma in C++17
int dummy[] = {(arrays[j] = ins[idx], 0)..., 0}; // extra zero to handle empty packs
(void) dummy;
(void) &dummy[0]; // MSVC needs extra strong unused warning supression
}
}
// process items_per_thread elements
Expand Down Expand Up @@ -279,6 +281,8 @@ _CCCL_DEVICE void transform_kernel_impl(
const auto smem_ptrs = ::cuda::std::tuple<InTs*...>{
copy_and_return_smem_dst(group, tile_size, smem, smem_offset, global_offset, pointers)...};
cooperative_groups::wait(group);
(void) smem_ptrs; // suppress unused warning for MSVC
(void) smem_offset; // suppress unused warning for MSVC

#pragma unroll 1
for (int i = 0; i < num_elem_per_thread; ++i)
Expand Down Expand Up @@ -460,8 +464,9 @@ _CCCL_DEVICE void transform_kernel_impl(
template <typename It>
struct kernel_arg
{
using PS = ptr_set<const value_t<It>>;
alignas(::cuda::std::max(alignof(It), alignof(PS))) char storage[::cuda::std::max(sizeof(It), sizeof(PS))];
using PS = ptr_set<const value_t<It>>;
static constexpr std::size_t alignment = ::cuda::std::max(alignof(It), alignof(PS)); // need extra variable for GCC<9
alignas(alignment) char storage[::cuda::std::max(sizeof(It), sizeof(PS))];

template <typename T>
_CCCL_HOST_DEVICE T& aliased_storage()
Expand Down Expand Up @@ -520,7 +525,7 @@ __launch_bounds__(MaxPolicy::ActivePolicy::algo_policy::BLOCK_THREADS)
Offset len, int num_elem_per_thread, F f, RandomAccessIteartorOut out, kernel_arg<RandomAccessIteartorsIn>... ins)
{
constexpr auto alg = ::cuda::std::integral_constant<Algorithm, MaxPolicy::ActivePolicy::algorithm>{};
transform_kernel_impl<MaxPolicy::ActivePolicy::algo_policy>(
transform_kernel_impl<typename MaxPolicy::ActivePolicy::algo_policy>(
alg,
len,
num_elem_per_thread,
Expand Down
25 changes: 14 additions & 11 deletions cub/test/catch2_test_device_transform.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,20 @@ using algorithms =

using offset_types = c2h::type_list<std::int32_t, std::int64_t>;

#define FILTER_UNSUPPORTED_ALGS \
int ptx_version = 0; \
REQUIRE(cub::PtxVersion(ptx_version) == cudaSuccess); \
if (alg == Algorithm::memcpy_async && ptx_version < 800) \
{ \
return; \
} \
if (alg == Algorithm::ublkcp && ptx_version < 900) \
{ \
return; \
}
#define FILTER_UNSUPPORTED_ALGS \
int ptx_version = 0; \
REQUIRE(cub::PtxVersion(ptx_version) == cudaSuccess); \
_CCCL_DIAG_PUSH \
_CCCL_DIAG_SUPPRESS_MSVC(4127) /* conditional expression is constant */ \
if (alg == Algorithm::memcpy_async && ptx_version < 800) \
{ \
return; \
} \
if (alg == Algorithm::ublkcp && ptx_version < 900) \
{ \
return; \
} \
_CCCL_DIAG_POP

CUB_TEST("DeviceTransform::Transform BabelStream add",
"[device][device_transform]",
Expand Down
16 changes: 14 additions & 2 deletions cub/test/catch2_test_device_transform_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "catch2_test_helper.h"

CUB_TEST("DeviceTransform::Transform API example", "[device][device_transform]")
// need a separate function because the ext. lambda needs to be enclosed by a function with external linkage on Windows
void test_transform_api()
{
// example-begin transform-many
constexpr auto num_items = 4;
Expand All @@ -28,7 +29,13 @@ CUB_TEST("DeviceTransform::Transform API example", "[device][device_transform]")
CHECK(result == expected);
}

CUB_TEST("DeviceTransform::TransformStableArgumentAddresses API example", "[device][device_transform]")
CUB_TEST("DeviceTransform::Transform API example", "[device][device_transform]")
{
test_transform_api();
}

// need a separate function because the ext. lambda needs to be enclosed by a function with external linkage on Windows
void test_transform_stable_api()
{
// example-begin transform-many-stable
constexpr auto num_items = 4;
Expand All @@ -49,3 +56,8 @@ CUB_TEST("DeviceTransform::TransformStableArgumentAddresses API example", "[devi
// example-end transform-many-stable
CHECK(result == expected);
}

CUB_TEST("DeviceTransform::TransformStableArgumentAddresses API example", "[device][device_transform]")
{
test_transform_stable_api();
}

0 comments on commit 4b8f2be

Please sign in to comment.