From 68bcf2528d9972dd24c6ddb8fa89336ad905c21b Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 2 Aug 2024 12:51:26 +0200 Subject: [PATCH] Fix multiple definition of InclusiveScanKernel (#2169) ``` Linking CXX executable bin/cub.cpp14.catch2_test.lid_0 FAILED: bin/cub.cpp14.catch2_test.lid_0 ... /usr/bin/ld: cub/test/CMakeFiles/cub.cpp14.test.warp_scan_api.dir/catch2_test_warp_scan_api.cu.o: in function `InclusiveScanKernel(int*)': /usr/local/cuda-12.7/targets/x86_64-linux/include/nvtx3/nvtxDetail/nvtxInitDefs.h:473: multiple definition of `InclusiveScanKernel(int*)'; cub/test/CMakeFiles/cub.cpp14.test.block_scan_api.dir/catch2_test_block_scan_api.cu.o:/usr/local/cuda-12.7/targets/x86_64-linux/include/nvtx3/nvtxDetail/nvtxInitDefs.h:468: first defined here collect2: error: ld returned 1 exit status ``` --- cub/test/catch2_test_block_scan_api.cu | 8 ++++---- cub/test/catch2_test_warp_scan_api.cu | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cub/test/catch2_test_block_scan_api.cu b/cub/test/catch2_test_block_scan_api.cu index 28a1f736655..9b1dd4fb4e6 100644 --- a/cub/test/catch2_test_block_scan_api.cu +++ b/cub/test/catch2_test_block_scan_api.cu @@ -39,7 +39,7 @@ constexpr int num_items_per_thread = 2; constexpr int block_num_threads = 64; // example-begin inclusive-scan-array-init-value -__global__ void InclusiveScanKernel(int* output) +__global__ void InclusiveBlockScanKernel(int* output) { // Specialize BlockScan for a 1D block of 64 threads of type int using block_scan_t = cub::BlockScan; @@ -69,7 +69,7 @@ CUB_TEST("Block array-based inclusive scan works with initial value", "[scan][bl { thrust::device_vector d_out(block_num_threads * num_items_per_thread); - InclusiveScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data())); + InclusiveBlockScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data())); REQUIRE(cudaSuccess == cudaPeekAtLastError()); REQUIRE(cudaSuccess == cudaDeviceSynchronize()); @@ -89,7 +89,7 @@ CUB_TEST("Block array-based inclusive scan works with initial value", "[scan][bl } // example-begin inclusive-scan-array-aggregate-init-value -__global__ void InclusiveScanKernelAggregate(int* output, int* d_block_aggregate) +__global__ void InclusiveBlockScanKernelAggregate(int* output, int* d_block_aggregate) { // Specialize BlockScan for a 1D block of 64 threads of type int using block_scan_t = cub::BlockScan; @@ -124,7 +124,7 @@ CUB_TEST("Block array-based inclusive scan with block aggregate works with initi thrust::device_vector d_out(block_num_threads * num_items_per_thread); c2h::device_vector d_block_aggregate(1); - InclusiveScanKernelAggregate<<<1, block_num_threads>>>( + InclusiveBlockScanKernelAggregate<<<1, block_num_threads>>>( thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_block_aggregate.data())); REQUIRE(cudaSuccess == cudaPeekAtLastError()); REQUIRE(cudaSuccess == cudaDeviceSynchronize()); diff --git a/cub/test/catch2_test_warp_scan_api.cu b/cub/test/catch2_test_warp_scan_api.cu index 39c578da4c7..b0c68916af2 100644 --- a/cub/test/catch2_test_warp_scan_api.cu +++ b/cub/test/catch2_test_warp_scan_api.cu @@ -56,7 +56,7 @@ struct sum_op }; // example-begin inclusive-warp-scan-init-value -__global__ void InclusiveScanKernel(int* output) +__global__ void InclusiveWarpScanKernel(int* output) { // Specialize WarpScan for type int using warp_scan_t = cub::WarpScan; @@ -89,7 +89,7 @@ CUB_TEST("Warp array-based inclusive scan works with initial value", "[scan][war { thrust::device_vector d_out(num_warps * 32); - InclusiveScanKernel<<<1, num_warps * 32>>>(thrust::raw_pointer_cast(d_out.data())); + InclusiveWarpScanKernel<<<1, num_warps * 32>>>(thrust::raw_pointer_cast(d_out.data())); REQUIRE(cudaSuccess == cudaPeekAtLastError()); REQUIRE(cudaSuccess == cudaDeviceSynchronize()); @@ -109,7 +109,7 @@ CUB_TEST("Warp array-based inclusive scan works with initial value", "[scan][war } // example-begin inclusive-warp-scan-init-value-aggregate -__global__ void InclusiveScanKernelAggr(int* output, int* d_warp_aggregate) +__global__ void InclusiveWarpScanKernelAggr(int* output, int* d_warp_aggregate) { // Specialize WarpScan for type int using warp_scan_t = cub::WarpScan; @@ -144,7 +144,7 @@ CUB_TEST("Warp array-based inclusive scan aggregate works with initial value", " thrust::device_vector d_out(num_warps * 32); c2h::device_vector d_warp_aggregate(num_warps); - InclusiveScanKernelAggr<<<1, num_warps * 32>>>( + InclusiveWarpScanKernelAggr<<<1, num_warps * 32>>>( thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_warp_aggregate.data())); REQUIRE(cudaSuccess == cudaPeekAtLastError()); REQUIRE(cudaSuccess == cudaDeviceSynchronize());