NVIDIA · bernhardmgruber · Aug 2, 2024 · Aug 1, 2024
@@ -39,7 +39,7 @@ constexpr int num_items_per_thread = 2;
 constexpr int block_num_threads    = 64;
 
 // example-begin inclusive-scan-array-init-value
-__global__ void InclusiveScanKernel(int* output)
+__global__ void InclusiveBlockScanKernel(int* output)
 {
   // Specialize BlockScan for a 1D block of 64 threads of type int
   using block_scan_t   = cub::BlockScan<int, 64>;
@@ -69,7 +69,7 @@ CUB_TEST("Block array-based inclusive scan works with initial value", "[scan][bl
 {
   thrust::device_vector<int> d_out(block_num_threads * num_items_per_thread);
 
-  InclusiveScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data()));
+  InclusiveBlockScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data()));
   REQUIRE(cudaSuccess == cudaPeekAtLastError());
   REQUIRE(cudaSuccess == cudaDeviceSynchronize());
 
@@ -89,7 +89,7 @@ CUB_TEST("Block array-based inclusive scan works with initial value", "[scan][bl
 }
 
 // example-begin inclusive-scan-array-aggregate-init-value
-__global__ void InclusiveScanKernelAggregate(int* output, int* d_block_aggregate)
+__global__ void InclusiveBlockScanKernelAggregate(int* output, int* d_block_aggregate)
 {
   // Specialize BlockScan for a 1D block of 64 threads of type int
   using block_scan_t   = cub::BlockScan<int, 64>;
@@ -124,7 +124,7 @@ CUB_TEST("Block array-based inclusive scan with block aggregate works with initi
   thrust::device_vector<int> d_out(block_num_threads * num_items_per_thread);
 
   c2h::device_vector<int> d_block_aggregate(1);
-  InclusiveScanKernelAggregate<<<1, block_num_threads>>>(
+  InclusiveBlockScanKernelAggregate<<<1, block_num_threads>>>(
     thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_block_aggregate.data()));
   REQUIRE(cudaSuccess == cudaPeekAtLastError());
   REQUIRE(cudaSuccess == cudaDeviceSynchronize());

@@ -56,7 +56,7 @@ struct sum_op
 };
 
 // example-begin inclusive-warp-scan-init-value
-__global__ void InclusiveScanKernel(int* output)
+__global__ void InclusiveWarpScanKernel(int* output)
 {
   // Specialize WarpScan for type int
   using warp_scan_t = cub::WarpScan<int>;
@@ -89,7 +89,7 @@ CUB_TEST("Warp array-based inclusive scan works with initial value", "[scan][war
 {
   thrust::device_vector<int> d_out(num_warps * 32);
 
-  InclusiveScanKernel<<<1, num_warps * 32>>>(thrust::raw_pointer_cast(d_out.data()));
+  InclusiveWarpScanKernel<<<1, num_warps * 32>>>(thrust::raw_pointer_cast(d_out.data()));
   REQUIRE(cudaSuccess == cudaPeekAtLastError());
   REQUIRE(cudaSuccess == cudaDeviceSynchronize());
 
@@ -109,7 +109,7 @@ CUB_TEST("Warp array-based inclusive scan works with initial value", "[scan][war
 }
 
 // example-begin inclusive-warp-scan-init-value-aggregate
-__global__ void InclusiveScanKernelAggr(int* output, int* d_warp_aggregate)
+__global__ void InclusiveWarpScanKernelAggr(int* output, int* d_warp_aggregate)
 {
   // Specialize WarpScan for type int
   using warp_scan_t = cub::WarpScan<int>;
@@ -144,7 +144,7 @@ CUB_TEST("Warp array-based inclusive scan aggregate works with initial value", "
   thrust::device_vector<int> d_out(num_warps * 32);
   c2h::device_vector<int> d_warp_aggregate(num_warps);
 
-  InclusiveScanKernelAggr<<<1, num_warps * 32>>>(
+  InclusiveWarpScanKernelAggr<<<1, num_warps * 32>>>(
     thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_warp_aggregate.data()));
   REQUIRE(cudaSuccess == cudaPeekAtLastError());
   REQUIRE(cudaSuccess == cudaDeviceSynchronize());