Skip to content

Commit

Permalink
Optimized solution
Browse files Browse the repository at this point in the history
  • Loading branch information
gonidelis committed Sep 17, 2024
1 parent 6f9fba8 commit 612a9c2
Showing 1 changed file with 25 additions and 7 deletions.
32 changes: 25 additions & 7 deletions cub/cub/device/device_find_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ __global__ void find_if(IterBegin begin, IterEnd end, Pred pred, int* result, st
// int elements_per_thread = 32;
auto tile_size = blockDim.x * elements_per_thread;
__shared__ int sresult;
__shared__ int block_result;

if (threadIdx.x == 0)
{
block_result = num_items;
}

for (int tile_offset = blockIdx.x * tile_size; tile_offset < num_items; tile_offset += tile_size * gridDim.x)
{
Expand All @@ -71,22 +77,34 @@ __global__ void find_if(IterBegin begin, IterEnd end, Pred pred, int* result, st
}
__syncthreads();

// early exit
if (sresult < tile_offset)
{
return;
}

bool found = false;
for (int i = 0; i < elements_per_thread; ++i)
{
auto index = tile_offset + threadIdx.x + i * blockDim.x;

if (index < num_items)
{
// early exit
if (sresult < index)
if (pred(*(begin + index)))
{
return;
found = true;
atomicMin(&block_result, index);
break;
}

if (pred(*(begin + index)))
}
}
if (syncthreads_or(found))
{
if (threadIdx.x == 0)
{
if (block_result < num_items)
{
atomicMin(result, index);
return;
atomicMin(result, block_result);
}
}
}
Expand Down

0 comments on commit 612a9c2

Please sign in to comment.