Skip to content

Commit

Permalink
Merge pull request #430 from senior-zero/fix-main/github/cub_debug
Browse files Browse the repository at this point in the history
Fix CubDebug
  • Loading branch information
gevtushenko committed Sep 12, 2023
2 parents cf6c417 + 2b08066 commit a11c1c1
Show file tree
Hide file tree
Showing 21 changed files with 843 additions and 414 deletions.
8 changes: 7 additions & 1 deletion cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,13 @@ struct ScanTileState<T, false>
allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized<T>); // bytes needed for inclusives

// Compute allocation pointers into the single storage blob
if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break;
error = CubDebug(
AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes));

if (cudaSuccess != error)
{
break;
}

// Alias the offsets
d_tile_status = reinterpret_cast<StatusWord*>(allocations[0]);
Expand Down
28 changes: 16 additions & 12 deletions cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ struct DispatchAdjacentDifference : public SelectedPolicy
void *allocations[1] = {nullptr};
std::size_t allocation_sizes[1] = {MayAlias * first_tile_previous_size};

if (CubDebug(error = AliasTemporaries(d_temp_storage,
temp_storage_bytes,
allocations,
allocation_sizes)))
error = CubDebug(
AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes));

if (cudaSuccess != error)
{
break;
}
Expand Down Expand Up @@ -278,15 +278,16 @@ struct DispatchAdjacentDifference : public SelectedPolicy
num_tiles,
tile_size);

error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

if (CubDebug(error))
if (cudaSuccess != error)
{
break;
}

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError()))
error = CubDebug(cudaPeekAtLastError());
if (cudaSuccess != error)
{
break;
}
Expand Down Expand Up @@ -319,15 +320,16 @@ struct DispatchAdjacentDifference : public SelectedPolicy
difference_op,
num_items);

error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

if (CubDebug(error))
if (cudaSuccess != error)
{
break;
}

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError()))
error = CubDebug(cudaPeekAtLastError());
if (cudaSuccess != error)
{
break;
}
Expand All @@ -352,7 +354,8 @@ struct DispatchAdjacentDifference : public SelectedPolicy
{
// Get PTX version
int ptx_version = 0;
if (CubDebug(error = PtxVersion(ptx_version)))
error = CubDebug(PtxVersion(ptx_version));
if (cudaSuccess != error)
{
break;
}
Expand All @@ -367,7 +370,8 @@ struct DispatchAdjacentDifference : public SelectedPolicy
stream);

// Dispatch to chained policy
if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch)))
error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
if (cudaSuccess != error)
{
break;
}
Expand Down
63 changes: 36 additions & 27 deletions cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -470,13 +470,14 @@ struct DispatchBatchMemcpy : SelectedPolicy

std::size_t buffer_offset_scan_storage = 0;
std::size_t blev_block_scan_storage = 0;
CubDebug(error = BLevBufferOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),
error = CubDebug(BLevBufferOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),
buffer_offset_scan_storage));
if (error)
{
return error;
}
CubDebug(error = BLevBlockOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),

error = CubDebug(BLevBlockOffsetTileState::AllocationSize(static_cast<int32_t>(num_tiles),
blev_block_scan_storage));
if (error)
{
Expand Down Expand Up @@ -504,8 +505,8 @@ struct DispatchBatchMemcpy : SelectedPolicy
}

// Alias memory buffers into the storage blob
if (CubDebug(
error = temporary_storage_layout.map_to_buffer(d_temp_storage, temp_storage_bytes)))
error = CubDebug(temporary_storage_layout.map_to_buffer(d_temp_storage, temp_storage_bytes));
if (cudaSuccess != error)
{
return error;
}
Expand Down Expand Up @@ -551,25 +552,26 @@ struct DispatchBatchMemcpy : SelectedPolicy

// Get device ordinal
int device_ordinal;
if (CubDebug(error = cudaGetDevice(&device_ordinal)))
error = CubDebug(cudaGetDevice(&device_ordinal));
if (cudaSuccess != error)
{
return error;
}

// Get SM count
int sm_count;
if (CubDebug(error = cudaDeviceGetAttribute(&sm_count,
cudaDevAttrMultiProcessorCount,
device_ordinal)))
error =
CubDebug(cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal));
if (cudaSuccess != error)
{
return error;
}

// Get SM occupancy for the batch memcpy block-level buffers kernel
int batch_memcpy_blev_occupancy;
if (CubDebug(error = MaxSmOccupancy(batch_memcpy_blev_occupancy,
multi_block_memcpy_kernel,
BLEV_BLOCK_THREADS)))
error = CubDebug(
MaxSmOccupancy(batch_memcpy_blev_occupancy, multi_block_memcpy_kernel, BLEV_BLOCK_THREADS));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -579,18 +581,20 @@ struct DispatchBatchMemcpy : SelectedPolicy

// Construct the tile status for the buffer prefix sum
BLevBufferOffsetTileState buffer_scan_tile_state;
if (CubDebug(error = buffer_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_buffer_scan_alloc.get(),
buffer_offset_scan_storage)))
error = CubDebug(buffer_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_buffer_scan_alloc.get(),
buffer_offset_scan_storage));
if (cudaSuccess != error)
{
return error;
}

// Construct the tile status for thread blocks-to-buffer-assignment prefix sum
BLevBlockOffsetTileState block_scan_tile_state;
if (CubDebug(error = block_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_block_scan_alloc.get(),
blev_block_scan_storage)))
error = CubDebug(block_scan_tile_state.Init(static_cast<int32_t>(num_tiles),
blev_block_scan_alloc.get(),
blev_block_scan_storage));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -612,16 +616,17 @@ struct DispatchBatchMemcpy : SelectedPolicy
.doit(init_scan_states_kernel, buffer_scan_tile_state, block_scan_tile_state, num_tiles);

// Check for failure to launch
if (CubDebug(error))
error = CubDebug(error);
if (cudaSuccess != error)
{
return error;
}

// Sync the stream if specified to flush runtime errors
error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

// Check for failure to launch
if (CubDebug(error))
if (cudaSuccess != error)
{
return error;
}
Expand Down Expand Up @@ -654,14 +659,15 @@ struct DispatchBatchMemcpy : SelectedPolicy
block_scan_tile_state);

// Check for failure to launch
if (CubDebug(error))
error = CubDebug(error);
if (cudaSuccess != error)
{
return error;
}

// Sync the stream if specified to flush runtime errors
error = detail::DebugSyncStream(stream);
if (CubDebug(error))
error = CubDebug(detail::DebugSyncStream(stream));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -687,13 +693,14 @@ struct DispatchBatchMemcpy : SelectedPolicy
batch_memcpy_grid_size - 1);

// Check for failure to launch
if (CubDebug(error))
error = CubDebug(error);
if (cudaSuccess != error)
{
return error;
}

// Sync the stream if specified to flush runtime errors
error = detail::DebugSyncStream(stream);
error = CubDebug(detail::DebugSyncStream(stream));

return error;
}
Expand All @@ -718,7 +725,8 @@ struct DispatchBatchMemcpy : SelectedPolicy

// Get PTX version
int ptx_version = 0;
if (CubDebug(error = PtxVersion(ptx_version)))
error = CubDebug(PtxVersion(ptx_version));
if (cudaSuccess != error)
{
return error;
}
Expand All @@ -733,7 +741,8 @@ struct DispatchBatchMemcpy : SelectedPolicy
stream);

// Dispatch to chained policy
if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch)))
error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
if (cudaSuccess != error)
{
return error;
}
Expand Down
Loading

0 comments on commit a11c1c1

Please sign in to comment.