Skip to content

Commit

Permalink
Qualify call to distance in thrust::async_reduce (#1904)
Browse files Browse the repository at this point in the history
If the arguments to the unqualified call to distance contain entities from the thrust and cuda::std namespaces at the same time, the distance functions from both namespaces are available and cause an ambiguity.

Also use ::cuda::std::unique_ptr to work around a swap issue: std::unique_ptr in libstdc++ before gcc10 used swap() internally, which would lead to an ambiguous call between std::swap and cuda::std::swap.

Fixes: #1886
  • Loading branch information
bernhardmgruber committed Jun 25, 2024
1 parent 3df3bcf commit df8109c
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
41 changes: 41 additions & 0 deletions thrust/testing/async_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -836,4 +836,45 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES(test_async_copy_then_reduce, BuiltinNu

// TODO: when_all from reductions.

// See also issue: https://github.com/NVIDIA/cccl/issues/1886
struct test_async_reduce_bug1886
{
struct tuple_sum
{
__device__ thrust::tuple<int, int>
operator()(const thrust::tuple<int, int>& t1, const thrust::tuple<int, int>& t2) const
{
return thrust::make_tuple(thrust::get<0>(t1) + thrust::get<0>(t2), thrust::get<1>(t1) + thrust::get<1>(t2));
}
};

void operator()() const
{
// Initialize input data
thrust::device_vector<int> d_data1{1, 2, 3, 4, 5};
thrust::device_vector<int> d_data2{10, 20, 30, 40, 50};

using TupleType = thrust::tuple<int, int>;
using IteratorType =
thrust::zip_iterator<thrust::tuple<thrust::device_vector<int>::iterator, thrust::device_vector<int>::iterator>>;

// Create zip_begin and zip_end iterators
IteratorType zip_begin = thrust::make_zip_iterator(thrust::make_tuple(d_data1.begin(), d_data2.begin()));
IteratorType zip_end = thrust::make_zip_iterator(thrust::make_tuple(d_data2.end(), d_data2.end()));

// Initialize the starting tuple
TupleType init = thrust::make_tuple(0, 0);

// Perform async reduce using zip_begin and zip_end
auto future = thrust::async::reduce(thrust::device, zip_begin, zip_end, init, tuple_sum());

// Get the result
TupleType result = future.get();

// Print the result
std::cout << "Sum: (" << thrust::get<0>(result) << ", " << thrust::get<1>(result) << ")" << std::endl;
}
};
DECLARE_UNITTEST(test_async_reduce_bug1886);

#endif
4 changes: 2 additions & 2 deletions thrust/thrust/system/cuda/detail/async/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ namespace cuda_cub
// ADL entry point.
template <typename DerivedPolicy, typename ForwardIt, typename Sentinel, typename T, typename BinaryOp>
auto async_reduce(execution_policy<DerivedPolicy>& policy, ForwardIt first, Sentinel last, T init, BinaryOp op)
THRUST_RETURNS(thrust::system::cuda::detail::async_reduce_n(policy, first, distance(first, last), init, op))
THRUST_RETURNS(thrust::system::cuda::detail::async_reduce_n(policy, first, thrust::distance(first, last), init, op))

} // namespace cuda_cub

Expand Down Expand Up @@ -222,7 +222,7 @@ template <typename DerivedPolicy, typename ForwardIt, typename Sentinel, typenam
auto async_reduce_into(
execution_policy<DerivedPolicy>& policy, ForwardIt first, Sentinel last, OutputIt output, T init, BinaryOp op)
THRUST_RETURNS(
thrust::system::cuda::detail::async_reduce_into_n(policy, first, distance(first, last), output, init, op))
thrust::system::cuda::detail::async_reduce_into_n(policy, first, thrust::distance(first, last), output, init, op))

} // namespace cuda_cub

Expand Down
8 changes: 5 additions & 3 deletions thrust/thrust/system/cuda/detail/future.inl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
# include <thrust/type_traits/integer_sequence.h>
# include <thrust/type_traits/remove_cvref.h>

# include <cuda/std/__memory/unique_ptr.h>

# include <type_traits>

THRUST_NAMESPACE_BEGIN
Expand Down Expand Up @@ -669,7 +671,7 @@ public:
_CCCL_HOST explicit unique_eager_event(unique_eager_future<U>&& other)
// NOTE: We upcast to `unique_ptr<async_signal>` here.
: device_(other.where())
, async_signal_(std::move(other.async_signal_))
, async_signal_(other.async_signal_.release())
{}

_CCCL_HOST
Expand Down Expand Up @@ -758,11 +760,11 @@ struct unique_eager_future final

private:
int device_ = 0;
std::unique_ptr<detail::async_value<value_type>> async_signal_;
::cuda::std::unique_ptr<detail::async_value<value_type>> async_signal_;

_CCCL_HOST explicit unique_eager_future(int device_id, std::unique_ptr<detail::async_value<value_type>> async_signal)
: device_(device_id)
, async_signal_(std::move(async_signal))
, async_signal_(async_signal.release())
{}

public:
Expand Down

0 comments on commit df8109c

Please sign in to comment.