Skip to content

Commit

Permalink
Fix multiple definition of InclusiveScanKernel (NVIDIA#2169)
Browse files Browse the repository at this point in the history
```
Linking CXX executable bin/cub.cpp14.catch2_test.lid_0
FAILED: bin/cub.cpp14.catch2_test.lid_0
...
/usr/bin/ld: cub/test/CMakeFiles/cub.cpp14.test.warp_scan_api.dir/catch2_test_warp_scan_api.cu.o: in function `InclusiveScanKernel(int*)':
/usr/local/cuda-12.7/targets/x86_64-linux/include/nvtx3/nvtxDetail/nvtxInitDefs.h:473: multiple definition of `InclusiveScanKernel(int*)'; cub/test/CMakeFiles/cub.cpp14.test.block_scan_api.dir/catch2_test_block_scan_api.cu.o:/usr/local/cuda-12.7/targets/x86_64-linux/include/nvtx3/nvtxDetail/nvtxInitDefs.h:468: first defined here
collect2: error: ld returned 1 exit status

```
  • Loading branch information
bernhardmgruber authored and pciolkosz committed Aug 4, 2024
1 parent d3464a5 commit 68bcf25
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
8 changes: 4 additions & 4 deletions cub/test/catch2_test_block_scan_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ constexpr int num_items_per_thread = 2;
constexpr int block_num_threads = 64;

// example-begin inclusive-scan-array-init-value
__global__ void InclusiveScanKernel(int* output)
__global__ void InclusiveBlockScanKernel(int* output)
{
// Specialize BlockScan for a 1D block of 64 threads of type int
using block_scan_t = cub::BlockScan<int, 64>;
Expand Down Expand Up @@ -69,7 +69,7 @@ CUB_TEST("Block array-based inclusive scan works with initial value", "[scan][bl
{
thrust::device_vector<int> d_out(block_num_threads * num_items_per_thread);

InclusiveScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data()));
InclusiveBlockScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data()));
REQUIRE(cudaSuccess == cudaPeekAtLastError());
REQUIRE(cudaSuccess == cudaDeviceSynchronize());

Expand All @@ -89,7 +89,7 @@ CUB_TEST("Block array-based inclusive scan works with initial value", "[scan][bl
}

// example-begin inclusive-scan-array-aggregate-init-value
__global__ void InclusiveScanKernelAggregate(int* output, int* d_block_aggregate)
__global__ void InclusiveBlockScanKernelAggregate(int* output, int* d_block_aggregate)
{
// Specialize BlockScan for a 1D block of 64 threads of type int
using block_scan_t = cub::BlockScan<int, 64>;
Expand Down Expand Up @@ -124,7 +124,7 @@ CUB_TEST("Block array-based inclusive scan with block aggregate works with initi
thrust::device_vector<int> d_out(block_num_threads * num_items_per_thread);

c2h::device_vector<int> d_block_aggregate(1);
InclusiveScanKernelAggregate<<<1, block_num_threads>>>(
InclusiveBlockScanKernelAggregate<<<1, block_num_threads>>>(
thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_block_aggregate.data()));
REQUIRE(cudaSuccess == cudaPeekAtLastError());
REQUIRE(cudaSuccess == cudaDeviceSynchronize());
Expand Down
8 changes: 4 additions & 4 deletions cub/test/catch2_test_warp_scan_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct sum_op
};

// example-begin inclusive-warp-scan-init-value
__global__ void InclusiveScanKernel(int* output)
__global__ void InclusiveWarpScanKernel(int* output)
{
// Specialize WarpScan for type int
using warp_scan_t = cub::WarpScan<int>;
Expand Down Expand Up @@ -89,7 +89,7 @@ CUB_TEST("Warp array-based inclusive scan works with initial value", "[scan][war
{
thrust::device_vector<int> d_out(num_warps * 32);

InclusiveScanKernel<<<1, num_warps * 32>>>(thrust::raw_pointer_cast(d_out.data()));
InclusiveWarpScanKernel<<<1, num_warps * 32>>>(thrust::raw_pointer_cast(d_out.data()));
REQUIRE(cudaSuccess == cudaPeekAtLastError());
REQUIRE(cudaSuccess == cudaDeviceSynchronize());

Expand All @@ -109,7 +109,7 @@ CUB_TEST("Warp array-based inclusive scan works with initial value", "[scan][war
}

// example-begin inclusive-warp-scan-init-value-aggregate
__global__ void InclusiveScanKernelAggr(int* output, int* d_warp_aggregate)
__global__ void InclusiveWarpScanKernelAggr(int* output, int* d_warp_aggregate)
{
// Specialize WarpScan for type int
using warp_scan_t = cub::WarpScan<int>;
Expand Down Expand Up @@ -144,7 +144,7 @@ CUB_TEST("Warp array-based inclusive scan aggregate works with initial value", "
thrust::device_vector<int> d_out(num_warps * 32);
c2h::device_vector<int> d_warp_aggregate(num_warps);

InclusiveScanKernelAggr<<<1, num_warps * 32>>>(
InclusiveWarpScanKernelAggr<<<1, num_warps * 32>>>(
thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_warp_aggregate.data()));
REQUIRE(cudaSuccess == cudaPeekAtLastError());
REQUIRE(cudaSuccess == cudaDeviceSynchronize());
Expand Down

0 comments on commit 68bcf25

Please sign in to comment.