Skip to content

Commit

Permalink
Fix thrust::sort for large problem sizes (#1952)
Browse files Browse the repository at this point in the history
* Fix thrust::sort for large problem sizes

* Test thrust::sort on large problem sizes

* [pre-commit.ci] auto code formatting

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Bernhard Manfred Gruber <[email protected]>
  • Loading branch information
3 people committed Jul 11, 2024
1 parent 8863d7e commit 9eb8d44
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 30 deletions.
102 changes: 102 additions & 0 deletions thrust/testing/cuda/sort.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
#include <thrust/copy.h>
#include <thrust/equal.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/reverse_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/sort.h>

#include <cuda/std/limits>

#include <cstdint>
#include <exception>

#include <unittest/unittest.h>

template <typename T>
Expand Down Expand Up @@ -157,3 +167,95 @@ struct TestRadixSortDispatch
// TODO(bgruber): use a single test case with a concatenated key list and a cartesion product with the comparators
SimpleUnitTest<TestRadixSortDispatch, IntegralTypes> TestRadixSortDispatchIntegralInstance;
SimpleUnitTest<TestRadixSortDispatch, FloatingPointTypes> TestRadixSortDispatchFPInstance;

/**
* Copy of CUB testing utility
*/
template <typename UnsignedIntegralKeyT>
struct index_to_key_value_op
{
static constexpr std::size_t max_key_value =
static_cast<std::size_t>(::cuda::std::numeric_limits<UnsignedIntegralKeyT>::max());
static constexpr std::size_t lowest_key_value =
static_cast<std::size_t>(::cuda::std::numeric_limits<UnsignedIntegralKeyT>::lowest());
static constexpr std::size_t num_distinct_key_values = (max_key_value - lowest_key_value + std::size_t{1ULL});

__device__ __host__ UnsignedIntegralKeyT operator()(std::size_t index)
{
return static_cast<UnsignedIntegralKeyT>(index % num_distinct_key_values);
}
};

/**
* Copy of CUB testing utility
*/
template <typename UnsignedIntegralKeyT>
class index_to_expected_key_op
{
private:
static constexpr std::size_t max_key_value =
static_cast<std::size_t>(::cuda::std::numeric_limits<UnsignedIntegralKeyT>::max());
static constexpr std::size_t lowest_key_value =
static_cast<std::size_t>(::cuda::std::numeric_limits<UnsignedIntegralKeyT>::lowest());
static constexpr std::size_t num_distinct_key_values = (max_key_value - lowest_key_value + std::size_t{1ULL});

// item_count / num_distinct_key_values
std::size_t expected_count_per_item;
// num remainder items: item_count%num_distinct_key_values
std::size_t num_remainder_items;
// remainder item_count: expected_count_per_item+1
std::size_t remainder_item_count;

public:
index_to_expected_key_op(std::size_t num_total_items)
: expected_count_per_item(num_total_items / num_distinct_key_values)
, num_remainder_items(num_total_items % num_distinct_key_values)
, remainder_item_count(expected_count_per_item + std::size_t{1ULL})
{}

__device__ __host__ UnsignedIntegralKeyT operator()(std::size_t index)
{
// The first (num_remainder_items * remainder_item_count) are items that appear once more often than the items that
// follow remainder_items_offset
std::size_t remainder_items_offset = num_remainder_items * remainder_item_count;

UnsignedIntegralKeyT target_item_index =
(index <= remainder_items_offset)
?
// This is one of the remainder items
static_cast<UnsignedIntegralKeyT>(index / remainder_item_count)
:
// This is an item that appears exactly expected_count_per_item times
static_cast<UnsignedIntegralKeyT>(
num_remainder_items + ((index - remainder_items_offset) / expected_count_per_item));
return target_item_index;
}
};

void TestSortWithMagnitude(int magnitude)
{
try
{
const std::size_t num_items = 1ull << magnitude;
thrust::device_vector<std::uint8_t> vec(num_items);
auto counting_it = thrust::make_counting_iterator(std::size_t{0});
auto key_value_it = thrust::make_transform_iterator(counting_it, index_to_key_value_op<std::uint8_t>{});
auto rev_sorted_it = thrust::make_reverse_iterator(key_value_it + num_items);
thrust::copy(rev_sorted_it, rev_sorted_it + num_items, vec.begin());
thrust::sort(vec.begin(), vec.end());
auto expected_result_it = thrust::make_transform_iterator(
thrust::make_counting_iterator(std::size_t{}), index_to_expected_key_op<std::uint8_t>(num_items));
const bool ok = thrust::equal(expected_result_it, expected_result_it + num_items, vec.cbegin());
ASSERT_EQUAL(ok, true);
}
catch (std::bad_alloc&)
{}
}

void TestSortWithLargeNumberOfItems()
{
TestSortWithMagnitude(39);
TestSortWithMagnitude(32);
TestSortWithMagnitude(33);
}
DECLARE_UNITTEST(TestSortWithLargeNumberOfItems);
34 changes: 4 additions & 30 deletions thrust/thrust/system/cuda/detail/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,7 @@ struct dispatch<thrust::detail::false_type, thrust::less<KeyOrVoid>>
cudaStream_t stream)
{
return cub::DeviceRadixSort::SortKeys(
d_temp_storage,
temp_storage_bytes,
keys_buffer,
static_cast<int>(count),
0,
static_cast<int>(sizeof(Key) * 8),
stream);
d_temp_storage, temp_storage_bytes, keys_buffer, count, 0, static_cast<int>(sizeof(Key) * 8), stream);
}
}; // struct dispatch -- sort keys in ascending order;

Expand All @@ -196,13 +190,7 @@ struct dispatch<thrust::detail::false_type, thrust::greater<KeyOrVoid>>
cudaStream_t stream)
{
return cub::DeviceRadixSort::SortKeysDescending(
d_temp_storage,
temp_storage_bytes,
keys_buffer,
static_cast<int>(count),
0,
static_cast<int>(sizeof(Key) * 8),
stream);
d_temp_storage, temp_storage_bytes, keys_buffer, count, 0, static_cast<int>(sizeof(Key) * 8), stream);
}
}; // struct dispatch -- sort keys in descending order;

Expand All @@ -220,14 +208,7 @@ struct dispatch<thrust::detail::true_type, thrust::less<KeyOrVoid>>
cudaStream_t stream)
{
return cub::DeviceRadixSort::SortPairs(
d_temp_storage,
temp_storage_bytes,
keys_buffer,
items_buffer,
static_cast<int>(count),
0,
static_cast<int>(sizeof(Key) * 8),
stream);
d_temp_storage, temp_storage_bytes, keys_buffer, items_buffer, count, 0, static_cast<int>(sizeof(Key) * 8), stream);
}
}; // struct dispatch -- sort pairs in ascending order;

Expand All @@ -245,14 +226,7 @@ struct dispatch<thrust::detail::true_type, thrust::greater<KeyOrVoid>>
cudaStream_t stream)
{
return cub::DeviceRadixSort::SortPairsDescending(
d_temp_storage,
temp_storage_bytes,
keys_buffer,
items_buffer,
static_cast<int>(count),
0,
static_cast<int>(sizeof(Key) * 8),
stream);
d_temp_storage, temp_storage_bytes, keys_buffer, items_buffer, count, 0, static_cast<int>(sizeof(Key) * 8), stream);
}
}; // struct dispatch -- sort pairs in descending order;

Expand Down

0 comments on commit 9eb8d44

Please sign in to comment.