From f152614df5b67d2254839392e1d2e920697cbf79 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 24 Jul 2024 00:15:53 +0000 Subject: [PATCH 01/10] construct with a stream_ref and record the event on construction --- .../cuda/experimental/__stream/stream.cuh | 196 ++++++++++++++++++ cudax/include/cuda/experimental/stream.cuh | 16 ++ cudax/test/CMakeLists.txt | 4 + cudax/test/stream/stream_smoke.cu | 24 +++ libcudacxx/include/cuda/stream_ref | 16 +- 5 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 cudax/include/cuda/experimental/__stream/stream.cuh create mode 100644 cudax/include/cuda/experimental/stream.cuh create mode 100644 cudax/test/stream/stream_smoke.cu diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh new file mode 100644 index 0000000000..1794d0800d --- /dev/null +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -0,0 +1,196 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX__STREAM_STREAM +#define _CUDAX__STREAM_STREAM + +#include +// cuda_runtime_api needs to come first + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +namespace cuda::experimental +{ + +namespace detail +{ +// 0 is a valid stream in CUDA, so we need some other invalid stream representation +static const cudaStream_t invalid_stream = reinterpret_cast(~0ULL); +} // namespace detail + +//! @brief An owning wrapper for cudaStream_t. +struct stream : stream_ref +{ + // 0 is documented as default priority + static constexpr int default_priority = 0; + + //! @brief Constructs a stream on a specified device and with specified priority + //! + //! @throws cuda_error if stream creation fails + explicit stream(device __dev, int __priority) + { + __scoped_device dev_setter(__dev); + _CCCL_TRY_CUDA_API( + ::cudaStreamCreateWithPriority, "Failed to create a stream", &__stream, cudaStreamDefault, __priority); + } + + //! @brief Constructs a stream on a specified device and with default priority + //! + //! @throws cuda_error if stream creation fails + explicit stream(device __dev) + : stream(__dev, default_priority) + {} + + //! @brief Constructs a stream on the default device + //! + //! @throws cuda_error if stream creation fails. + stream() + : stream(device{0}) + {} + + //! @brief Construct a new `stream` object into the moved-from state. + //! + //! @post `stream()` returns an invalid stream handle + explicit constexpr stream(uninit_t) noexcept + : stream_ref(detail::invalid_stream) + {} + + //! @brief Move-construct a new `stream` object + //! + //! @param __other + //! + //! @post `__other` is in moved-from state. + stream(stream&& __other) noexcept + : stream(_CUDA_VSTD::exchange(__other.__stream, detail::invalid_stream)) + {} + + // Disallow copy construction. + stream(const stream&) = delete; + + //! Destroy the `stream` object + //! + //! @note If the stream fails to be destroyed, the error is silently ignored. + ~stream() + { + if (__stream != detail::invalid_stream) + { + [[maybe_unused]] auto status = ::cudaStreamDestroy(__stream); + } + } + + //! @brief Move-assign a `stream` object + //! + //! @param __other + //! + //! @post `__other` is in a moved-from state. + stream& operator=(stream&& __other) { + stream __tmp(_CUDA_VSTD::move(__other)); + _CUDA_VSTD::swap(__stream, __tmp.__stream); + return *this; + } + + // Disallow copy assignment. + stream& operator=(const stream&) = delete; + + // Ideally records and waits below would be in stream_ref, but we can't have it depend on cudax yet + + //! @brief Create a new event and record it into this stream + //! + //! @return A new event that was recorded into this stream + //! + //! @throws cuda_error if event creation or record failed + event record_event(event::flags __flags = event::flags::none) const { + return event(*this, __flags); + } + + //! @brief Create a new timed event and record it into this stream + //! + //! @return A new timed event that was recorded into this stream + //! + //! @throws cuda_error if event creation or record failed + timed_event record_timed_event(event::flags __flags = event::flags::none) const { + return timed_event(*this, __flags); + } + + //! @brief Make all future work submitted into this stream depend on completion of the specified event + //! + //! @param __ev Event that this stream should wait for + //! + //! @throws cuda_error if inserting the dependency fails + void wait(event_ref __ev) const { + assert(__ev.get() != nullptr); + _CCCL_TRY_CUDA_API(::cudaStreamWaitEvent, "Failed to make a stream wait for an event", get(), __ev.get()); + } + + //! @brief Make all future work submitted into this stream depend on completion of all work from the specified stream + //! + //! @param __other Stream that this stream should wait for + //! + //! @throws cuda_error if inserting the dependency fails + void wait(const stream_ref __other) const { + // TODO consider an optimization to not create an event every time and instead have one persistent event or one per stream + assert(__stream.get() != nullptr); + event __tmp(__other); + wait(__tmp); + } + + //! @brief Construct an `stream` object from a native `cudaStream_t` handle. + //! + //! @param __handle The native handle + //! + //! @return stream The constructed `stream` object + //! + //! @note The constructed `stream` object takes ownership of the native handle. + static stream from_native_handle(cudaStream_t __handle) + { + return stream(__handle); + } + + // Disallow construction from an `int`, e.g., `0`. + static stream from_native_handle(int) = delete; + + // Disallow construction from `nullptr`. + static stream from_native_handle(_CUDA_VSTD::nullptr_t) = delete; + + //! @brief Retrieve the native `cudaStream_t` handle and give up ownership. + //! + //! @return cudaStream_t The native handle being held by the `stream` object. + //! + //! @post The stream object is in a moved-from state. + cudaStream_t release() + { + return _CUDA_VSTD::exchange(__stream, detail::invalid_stream); + } + +private: + // Use `stream::from_native_handle(s)` to construct an owning `stream` + // object from a `cudaStream_t` handle. + explicit stream(cudaStream_t __handle) + : stream_ref(__handle) + {} +}; + +} // namespace cuda::experimental + +#endif diff --git a/cudax/include/cuda/experimental/stream.cuh b/cudax/include/cuda/experimental/stream.cuh new file mode 100644 index 0000000000..6c9c280ff6 --- /dev/null +++ b/cudax/include/cuda/experimental/stream.cuh @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __CUDAX_STREAM__ +#define __CUDAX_STREAM__ + +#include + +#endif diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 2f337668dd..c3cc9163b2 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -62,4 +62,8 @@ foreach(cn_target IN LISTS cudax_TARGETS) event/event_smoke.cu ) target_compile_options(${test_target} PRIVATE $<$:--extended-lambda>) + + cudax_add_catch2_test(test_target stream_tests ${cn_target} + stream/stream_smoke.cu + ) endforeach() diff --git a/cudax/test/stream/stream_smoke.cu b/cudax/test/stream/stream_smoke.cu new file mode 100644 index 0000000000..2c2f22b45b --- /dev/null +++ b/cudax/test/stream/stream_smoke.cu @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include "../common/utility.cuh" +#include "../hierarchy/testing_common.cuh" +#include + +TEST_CASE("Stream create", "[stream]") +{ + cudax::stream str1, str2; + cudax::event ev1(str1); + + auto ev2 = str1.record_event(); + str2.wait(str1); +} \ No newline at end of file diff --git a/libcudacxx/include/cuda/stream_ref b/libcudacxx/include/cuda/stream_ref index 4cb53bbbc0..6453eabd15 100644 --- a/libcudacxx/include/cuda/stream_ref +++ b/libcudacxx/include/cuda/stream_ref @@ -62,7 +62,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA */ class stream_ref { -private: +protected: ::cudaStream_t __stream{0}; public: @@ -168,6 +168,20 @@ public: } return true; } + + /** + * \brief Queries the priority of the wrapped stream. + * + * \throws cuda::cuda_error if the query fails. + * + * \return value representing the priority of the wrapped stream. + */ + _CCCL_NODISCARD int priority() const + { + int __result = 0; + _CCCL_TRY_CUDA_API(::cudaStreamGetPriority, "Failed to get stream priority", get(), &__result); + return __result; + } }; _LIBCUDACXX_END_NAMESPACE_CUDA From 6932a120b3c5df67e520bbad4c02dc76f2905c9a Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Fri, 26 Jul 2024 14:46:09 -0700 Subject: [PATCH 02/10] Fix formatting --- .../cuda/experimental/__stream/stream.cuh | 24 ++++++++++++------- cudax/test/stream/stream_smoke.cu | 12 +++++----- libcudacxx/include/cuda/stream_ref | 6 ++--- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index 1794d0800d..2655894aba 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -27,8 +27,8 @@ #include #include #include -#include #include +#include namespace cuda::experimental { @@ -104,7 +104,8 @@ struct stream : stream_ref //! @param __other //! //! @post `__other` is in a moved-from state. - stream& operator=(stream&& __other) { + stream& operator=(stream&& __other) + { stream __tmp(_CUDA_VSTD::move(__other)); _CUDA_VSTD::swap(__stream, __tmp.__stream); return *this; @@ -120,7 +121,8 @@ struct stream : stream_ref //! @return A new event that was recorded into this stream //! //! @throws cuda_error if event creation or record failed - event record_event(event::flags __flags = event::flags::none) const { + event record_event(event::flags __flags = event::flags::none) const + { return event(*this, __flags); } @@ -129,27 +131,31 @@ struct stream : stream_ref //! @return A new timed event that was recorded into this stream //! //! @throws cuda_error if event creation or record failed - timed_event record_timed_event(event::flags __flags = event::flags::none) const { + timed_event record_timed_event(event::flags __flags = event::flags::none) const + { return timed_event(*this, __flags); } //! @brief Make all future work submitted into this stream depend on completion of the specified event - //! + //! //! @param __ev Event that this stream should wait for //! //! @throws cuda_error if inserting the dependency fails - void wait(event_ref __ev) const { + void wait(event_ref __ev) const + { assert(__ev.get() != nullptr); _CCCL_TRY_CUDA_API(::cudaStreamWaitEvent, "Failed to make a stream wait for an event", get(), __ev.get()); } //! @brief Make all future work submitted into this stream depend on completion of all work from the specified stream //! - //! @param __other Stream that this stream should wait for + //! @param __other Stream that this stream should wait for //! //! @throws cuda_error if inserting the dependency fails - void wait(const stream_ref __other) const { - // TODO consider an optimization to not create an event every time and instead have one persistent event or one per stream + void wait(const stream_ref __other) const + { + // TODO consider an optimization to not create an event every time and instead have one persistent event or one per + // stream assert(__stream.get() != nullptr); event __tmp(__other); wait(__tmp); diff --git a/cudax/test/stream/stream_smoke.cu b/cudax/test/stream/stream_smoke.cu index 2c2f22b45b..b5ab4f48f5 100644 --- a/cudax/test/stream/stream_smoke.cu +++ b/cudax/test/stream/stream_smoke.cu @@ -16,9 +16,9 @@ TEST_CASE("Stream create", "[stream]") { - cudax::stream str1, str2; - cudax::event ev1(str1); - - auto ev2 = str1.record_event(); - str2.wait(str1); -} \ No newline at end of file + cudax::stream str1, str2; + cudax::event ev1(str1); + + auto ev2 = str1.record_event(); + str2.wait(str1); +} diff --git a/libcudacxx/include/cuda/stream_ref b/libcudacxx/include/cuda/stream_ref index 6453eabd15..82c49867ae 100644 --- a/libcudacxx/include/cuda/stream_ref +++ b/libcudacxx/include/cuda/stream_ref @@ -171,11 +171,11 @@ public: /** * \brief Queries the priority of the wrapped stream. - * + * * \throws cuda::cuda_error if the query fails. - * + * * \return value representing the priority of the wrapped stream. - */ + */ _CCCL_NODISCARD int priority() const { int __result = 0; From c6ffbb74b689ba9f9d0751d4a0fb81da22b4d27e Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Sun, 28 Jul 2024 12:35:44 -0700 Subject: [PATCH 03/10] Add event testing --- .../cuda/experimental/__event/event.cuh | 5 +- .../__hierarchy/level_dimensions.cuh | 6 +- .../cuda/experimental/__stream/stream.cuh | 6 +- cudax/test/common/utility.cuh | 30 ++++++ cudax/test/event/event_smoke.cu | 14 +-- cudax/test/stream/stream_smoke.cu | 93 +++++++++++++++++-- 6 files changed, 130 insertions(+), 24 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index 0b6b7802b2..df72d7fd5a 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -24,13 +24,12 @@ # pragma system_header #endif // no system header +#include +#include #include #include #include -#include -#include - namespace cuda::experimental { class timed_event; diff --git a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh index c685a2db6a..d48ac1ee61 100644 --- a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh @@ -11,9 +11,8 @@ #ifndef _CUDAX__HIERARCHY_LEVEL_DIMENSIONS #define _CUDAX__HIERARCHY_LEVEL_DIMENSIONS -#include - #include +#include #if _CCCL_STD_VER >= 2017 namespace cuda::experimental @@ -124,7 +123,8 @@ struct level_dimensions _CCCL_HOST_DEVICE constexpr level_dimensions(Dimensions&& d) : dims(d) {} - _CCCL_HOST_DEVICE constexpr level_dimensions(){}; + _CCCL_HOST_DEVICE constexpr level_dimensions() + : dims(){}; }; /** diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index 2655894aba..bb75c638a3 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -36,6 +36,7 @@ namespace cuda::experimental namespace detail { // 0 is a valid stream in CUDA, so we need some other invalid stream representation +// Can't make it constexpr, because cudaStream_t is a pointer type static const cudaStream_t invalid_stream = reinterpret_cast(~0ULL); } // namespace detail @@ -72,7 +73,8 @@ struct stream : stream_ref //! @brief Construct a new `stream` object into the moved-from state. //! //! @post `stream()` returns an invalid stream handle - explicit constexpr stream(uninit_t) noexcept + // Can't be constexpr because invalid_stream isn't + explicit stream(uninit_t) noexcept : stream_ref(detail::invalid_stream) {} @@ -136,6 +138,8 @@ struct stream : stream_ref return timed_event(*this, __flags); } + using stream_ref::wait; + //! @brief Make all future work submitted into this stream depend on completion of the specified event //! //! @param __ev Event that this stream should wait for diff --git a/cudax/test/common/utility.cuh b/cudax/test/common/utility.cuh index d259b64d58..2d7254c069 100644 --- a/cudax/test/common/utility.cuh +++ b/cudax/test/common/utility.cuh @@ -11,12 +11,16 @@ #include // cuda_runtime_api needs to come first +#include #include #include #include #include // IWYU pragma: keep (needed for placement new) +// TODO unify the common testing header +#include "../hierarchy/testing_common.cuh" + namespace { namespace test @@ -107,6 +111,32 @@ public: } }; +struct assign_42 +{ + __device__ constexpr void operator()(int* pi) const noexcept + { + *pi = 42; + } +}; + +struct verify_42 +{ + __device__ void operator()(int* pi) const noexcept + { + CUDAX_REQUIRE(*pi == 42); + } +}; + +struct spin_until_80 +{ + __device__ void operator()(int* pi) const noexcept + { + cuda::atomic_ref atomic_pi(*pi); + while (atomic_pi.load() != 80) + ; + } +}; + /// A kernel that takes a callable object and invokes it with a set of arguments template __global__ void invokernel(Fn fn, Args... args) diff --git a/cudax/test/event/event_smoke.cu b/cudax/test/event/event_smoke.cu index bc61775dab..ae5286a4f7 100644 --- a/cudax/test/event/event_smoke.cu +++ b/cudax/test/event/event_smoke.cu @@ -66,14 +66,6 @@ TEST_CASE("can copy construct an event_ref and compare for equality", "[event]") CUDAX_REQUIRE(!ref4); } -struct assign_42 -{ - __device__ constexpr void operator()(int* pi) const noexcept - { - *pi = 42; - } -}; - TEST_CASE("can use event_ref to record and wait on an event", "[event]") { ::cudaEvent_t ev; @@ -82,7 +74,7 @@ TEST_CASE("can use event_ref to record and wait on an event", "[event]") test::managed i(0); test::stream stream; - ::test::invokernel<<<1, 1, 0, stream.get()>>>(assign_42{}, i.get()); + ::test::invokernel<<<1, 1, 0, stream.get()>>>(::test::assign_42{}, i.get()); ref.record(stream); ref.wait(); CUDAX_REQUIRE(*i == 42); @@ -102,7 +94,7 @@ TEST_CASE("can wait on an event", "[event]") { test::stream stream; ::test::managed i(0); - ::test::invokernel<<<1, 1, 0, stream.get()>>>(assign_42{}, i.get()); + ::test::invokernel<<<1, 1, 0, stream.get()>>>(::test::assign_42{}, i.get()); cudax::event ev(stream); ev.wait(); CUDAX_REQUIRE(*i == 42); @@ -114,7 +106,7 @@ TEST_CASE("can take the difference of two timed_event objects", "[event]") test::stream stream; ::test::managed i(0); cudax::timed_event start(stream); - ::test::invokernel<<<1, 1, 0, stream.get()>>>(assign_42{}, i.get()); + ::test::invokernel<<<1, 1, 0, stream.get()>>>(::test::assign_42{}, i.get()); cudax::timed_event end(stream); end.wait(); CUDAX_REQUIRE(*i == 42); diff --git a/cudax/test/stream/stream_smoke.cu b/cudax/test/stream/stream_smoke.cu index b5ab4f48f5..76c3c79bf4 100644 --- a/cudax/test/stream/stream_smoke.cu +++ b/cudax/test/stream/stream_smoke.cu @@ -8,17 +8,98 @@ // //===----------------------------------------------------------------------===// +#define LIBCUDACXX_ENABLE_EXCEPTIONS +#include #include #include "../common/utility.cuh" -#include "../hierarchy/testing_common.cuh" #include -TEST_CASE("Stream create", "[stream]") +constexpr auto one_thread_dims = cudax::make_hierarchy(cudax::block_dims<1>(), cudax::grid_dims<1>()); + +TEST_CASE("Can create a stream and launch work into it", "[stream]") +{ + cudax::stream str; + ::test::managed i(0); + cudax::launch(str, one_thread_dims, ::test::assign_42{}, i.get()); + str.wait(); + CUDAX_REQUIRE(*i == 42); +} + +TEST_CASE("From native handle", "[stream]") +{ + cudaStream_t handle; + CUDART(cudaStreamCreate(&handle)); + { + auto stream = cudax::stream::from_native_handle(handle); + + ::test::managed i(0); + cudax::launch(stream, one_thread_dims, ::test::assign_42{}, i.get()); + stream.wait(); + CUDAX_REQUIRE(*i == 42); + stream.release(); + } + CUDART(cudaStreamDestroy(handle)); +} + +TEST_CASE("Can add dependency into a stream", "[stream]") +{ + cudax::stream waiter, waitee; + CUDAX_REQUIRE(waiter != waitee); + + auto verify_dependency = [&](const auto& insert_dependency) { + ::test::managed i(0); + ::cuda::atomic_ref atomic_i(*i); + + cudax::launch(waitee, one_thread_dims, ::test::spin_until_80{}, i.get()); + cudax::launch(waitee, one_thread_dims, ::test::assign_42{}, i.get()); + insert_dependency(); + cudax::launch(waiter, one_thread_dims, ::test::verify_42{}, i.get()); + CUDAX_REQUIRE(atomic_i.load() != 42); + CUDAX_REQUIRE(!waiter.ready()); + atomic_i.store(80); + waiter.wait(); + waitee.wait(); + }; + + SECTION("Stream wait declared event") + { + verify_dependency([&]() { + cudax::event ev(waitee); + waiter.wait(ev); + }); + } + + SECTION("Stream wait returned event") + { + verify_dependency([&]() { + auto ev = waitee.record_event(); + waiter.wait(ev); + }); + } + + SECTION("Stream wait returned timed event") + { + verify_dependency([&]() { + auto ev = waitee.record_timed_event(); + waiter.wait(ev); + }); + } + + SECTION("Stream wait stream") + { + verify_dependency([&]() { + waiter.wait(waitee); + }); + } +} + +TEST_CASE("Stream priority", "[stream]") { - cudax::stream str1, str2; - cudax::event ev1(str1); + cudax::stream stream_default_prio; + CUDAX_REQUIRE(stream_default_prio.priority() == cudax::stream::default_priority); - auto ev2 = str1.record_event(); - str2.wait(str1); + auto priority = cudax::stream::default_priority - 1; + cudax::stream stream(0, priority); + CUDAX_REQUIRE(stream.priority() == priority); } From 323aa7c1afb5975cf07c122b18af0bb300ac810c Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Sun, 28 Jul 2024 12:41:45 -0700 Subject: [PATCH 04/10] Add some nodiscards --- cudax/include/cuda/experimental/__stream/stream.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index bb75c638a3..ebbd58c27b 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -123,7 +123,7 @@ struct stream : stream_ref //! @return A new event that was recorded into this stream //! //! @throws cuda_error if event creation or record failed - event record_event(event::flags __flags = event::flags::none) const + _CCCL_NODISCARD event record_event(event::flags __flags = event::flags::none) const { return event(*this, __flags); } @@ -133,7 +133,7 @@ struct stream : stream_ref //! @return A new timed event that was recorded into this stream //! //! @throws cuda_error if event creation or record failed - timed_event record_timed_event(event::flags __flags = event::flags::none) const + _CCCL_NODISCARD timed_event record_timed_event(event::flags __flags = event::flags::none) const { return timed_event(*this, __flags); } @@ -172,7 +172,7 @@ struct stream : stream_ref //! @return stream The constructed `stream` object //! //! @note The constructed `stream` object takes ownership of the native handle. - static stream from_native_handle(cudaStream_t __handle) + _CCCL_NODISCARD static stream from_native_handle(cudaStream_t __handle) { return stream(__handle); } @@ -188,7 +188,7 @@ struct stream : stream_ref //! @return cudaStream_t The native handle being held by the `stream` object. //! //! @post The stream object is in a moved-from state. - cudaStream_t release() + _CCCL_NODISCARD cudaStream_t release() { return _CUDA_VSTD::exchange(__stream, detail::invalid_stream); } From 11784c0f9ef8705f33f6fa319120603dfe44a34a Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Sun, 28 Jul 2024 14:55:57 -0700 Subject: [PATCH 05/10] Fix discarding release return value --- cudax/include/cuda/experimental/__stream/stream.cuh | 8 ++++---- cudax/test/stream/stream_smoke.cu | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index ebbd58c27b..dc14a7c2a0 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -37,7 +37,7 @@ namespace detail { // 0 is a valid stream in CUDA, so we need some other invalid stream representation // Can't make it constexpr, because cudaStream_t is a pointer type -static const cudaStream_t invalid_stream = reinterpret_cast(~0ULL); +static const ::cudaStream_t invalid_stream = reinterpret_cast(~0ULL); } // namespace detail //! @brief An owning wrapper for cudaStream_t. @@ -172,7 +172,7 @@ struct stream : stream_ref //! @return stream The constructed `stream` object //! //! @note The constructed `stream` object takes ownership of the native handle. - _CCCL_NODISCARD static stream from_native_handle(cudaStream_t __handle) + _CCCL_NODISCARD static stream from_native_handle(::cudaStream_t __handle) { return stream(__handle); } @@ -188,7 +188,7 @@ struct stream : stream_ref //! @return cudaStream_t The native handle being held by the `stream` object. //! //! @post The stream object is in a moved-from state. - _CCCL_NODISCARD cudaStream_t release() + _CCCL_NODISCARD ::cudaStream_t release() { return _CUDA_VSTD::exchange(__stream, detail::invalid_stream); } @@ -196,7 +196,7 @@ struct stream : stream_ref private: // Use `stream::from_native_handle(s)` to construct an owning `stream` // object from a `cudaStream_t` handle. - explicit stream(cudaStream_t __handle) + explicit stream(::cudaStream_t __handle) : stream_ref(__handle) {} }; diff --git a/cudax/test/stream/stream_smoke.cu b/cudax/test/stream/stream_smoke.cu index 76c3c79bf4..e6b86ccf16 100644 --- a/cudax/test/stream/stream_smoke.cu +++ b/cudax/test/stream/stream_smoke.cu @@ -37,7 +37,7 @@ TEST_CASE("From native handle", "[stream]") cudax::launch(stream, one_thread_dims, ::test::assign_42{}, i.get()); stream.wait(); CUDAX_REQUIRE(*i == 42); - stream.release(); + (void) stream.release(); } CUDART(cudaStreamDestroy(handle)); } From 8f353ed141f1db2ce52871caf1a2a6c04eff2631 Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Mon, 29 Jul 2024 09:19:43 -0700 Subject: [PATCH 06/10] Review feedback --- .../cuda/experimental/__stream/stream.cuh | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index dc14a7c2a0..29102801eb 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -11,10 +11,8 @@ #ifndef _CUDAX__STREAM_STREAM #define _CUDAX__STREAM_STREAM -#include -// cuda_runtime_api needs to come first - #include +#include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header @@ -27,7 +25,6 @@ #include #include #include -#include #include namespace cuda::experimental @@ -48,21 +45,16 @@ struct stream : stream_ref //! @brief Constructs a stream on a specified device and with specified priority //! + //! Priority is defaulted to stream::default_priority + //! //! @throws cuda_error if stream creation fails - explicit stream(device __dev, int __priority) + explicit stream(device __dev, int __priority = default_priority) { __scoped_device dev_setter(__dev); _CCCL_TRY_CUDA_API( ::cudaStreamCreateWithPriority, "Failed to create a stream", &__stream, cudaStreamDefault, __priority); } - //! @brief Constructs a stream on a specified device and with default priority - //! - //! @throws cuda_error if stream creation fails - explicit stream(device __dev) - : stream(__dev, default_priority) - {} - //! @brief Constructs a stream on the default device //! //! @throws cuda_error if stream creation fails. @@ -87,7 +79,6 @@ struct stream : stream_ref : stream(_CUDA_VSTD::exchange(__other.__stream, detail::invalid_stream)) {} - // Disallow copy construction. stream(const stream&) = delete; //! Destroy the `stream` object @@ -106,14 +97,13 @@ struct stream : stream_ref //! @param __other //! //! @post `__other` is in a moved-from state. - stream& operator=(stream&& __other) + stream& operator=(stream&& __other) noexcept { stream __tmp(_CUDA_VSTD::move(__other)); _CUDA_VSTD::swap(__stream, __tmp.__stream); return *this; } - // Disallow copy assignment. stream& operator=(const stream&) = delete; // Ideally records and waits below would be in stream_ref, but we can't have it depend on cudax yet From 6d8631463a9678e9f00119f4c6434c210e0483eb Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Mon, 29 Jul 2024 09:20:14 -0700 Subject: [PATCH 07/10] Fix formatting --- cudax/include/cuda/experimental/__event/event.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cudax/include/cuda/experimental/__event/event.cuh b/cudax/include/cuda/experimental/__event/event.cuh index df72d7fd5a..0b6b7802b2 100644 --- a/cudax/include/cuda/experimental/__event/event.cuh +++ b/cudax/include/cuda/experimental/__event/event.cuh @@ -24,12 +24,13 @@ # pragma system_header #endif // no system header -#include -#include #include #include #include +#include +#include + namespace cuda::experimental { class timed_event; From 1d695fee80104ebdcf3fa68625410288c95544a5 Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Mon, 29 Jul 2024 09:23:53 -0700 Subject: [PATCH 08/10] Add missing endif comments --- cudax/include/cuda/experimental/__stream/stream.cuh | 7 ++++--- cudax/include/cuda/experimental/hierarchy.cuh | 2 +- cudax/include/cuda/experimental/launch.cuh | 2 +- cudax/include/cuda/experimental/stream.cuh | 2 +- cudax/include/cuda/experimental/version.cuh | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index 29102801eb..c4e71fce13 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -22,11 +22,12 @@ # pragma system_header #endif // no system header -#include -#include #include #include +#include +#include + namespace cuda::experimental { @@ -193,4 +194,4 @@ private: } // namespace cuda::experimental -#endif +#endif // _CUDAX__STREAM_STREAM diff --git a/cudax/include/cuda/experimental/hierarchy.cuh b/cudax/include/cuda/experimental/hierarchy.cuh index 15f7269388..1edc768650 100644 --- a/cudax/include/cuda/experimental/hierarchy.cuh +++ b/cudax/include/cuda/experimental/hierarchy.cuh @@ -13,4 +13,4 @@ #include -#endif +#endif // __CUDAX_HIERARCHY___ diff --git a/cudax/include/cuda/experimental/launch.cuh b/cudax/include/cuda/experimental/launch.cuh index 8fb5c31227..69048248ef 100644 --- a/cudax/include/cuda/experimental/launch.cuh +++ b/cudax/include/cuda/experimental/launch.cuh @@ -13,4 +13,4 @@ #include -#endif +#endif // __CUDAX_LAUNCH___ diff --git a/cudax/include/cuda/experimental/stream.cuh b/cudax/include/cuda/experimental/stream.cuh index 6c9c280ff6..c4a1a08c8a 100644 --- a/cudax/include/cuda/experimental/stream.cuh +++ b/cudax/include/cuda/experimental/stream.cuh @@ -13,4 +13,4 @@ #include -#endif +#endif // __CUDAX_STREAM__ diff --git a/cudax/include/cuda/experimental/version.cuh b/cudax/include/cuda/experimental/version.cuh index d1511237af..563de5cd4d 100644 --- a/cudax/include/cuda/experimental/version.cuh +++ b/cudax/include/cuda/experimental/version.cuh @@ -18,4 +18,4 @@ #define CUDAX_VERSION_MINOR CCCL_MINOR_VERSION #define CUDAX_VERSION_PATCH CCCL_PATCH_VERSION -#endif +#endif // __CUDAX_VERSION__ From 113e29fadd70c5e3500ae880386f4164220edd3a Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Mon, 29 Jul 2024 09:26:30 -0700 Subject: [PATCH 09/10] One more format fix --- .../include/cuda/experimental/__hierarchy/level_dimensions.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh index d48ac1ee61..69bfd88e24 100644 --- a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh @@ -11,9 +11,10 @@ #ifndef _CUDAX__HIERARCHY_LEVEL_DIMENSIONS #define _CUDAX__HIERARCHY_LEVEL_DIMENSIONS -#include #include +#include + #if _CCCL_STD_VER >= 2017 namespace cuda::experimental { From 0cf3c9b5deec454ff244c4d8c370070be419336d Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Mon, 29 Jul 2024 14:09:20 -0700 Subject: [PATCH 10/10] Fix extra const and nullptr stream --- cudax/include/cuda/experimental/__stream/stream.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cudax/include/cuda/experimental/__stream/stream.cuh b/cudax/include/cuda/experimental/__stream/stream.cuh index c4e71fce13..a63a5a3931 100644 --- a/cudax/include/cuda/experimental/__stream/stream.cuh +++ b/cudax/include/cuda/experimental/__stream/stream.cuh @@ -147,11 +147,11 @@ struct stream : stream_ref //! @param __other Stream that this stream should wait for //! //! @throws cuda_error if inserting the dependency fails - void wait(const stream_ref __other) const + void wait(stream_ref __other) const { // TODO consider an optimization to not create an event every time and instead have one persistent event or one per // stream - assert(__stream.get() != nullptr); + assert(__stream.get() != detail::invalid_stream); event __tmp(__other); wait(__tmp); }