From 7bd04adda86e3f00e4a36387d20df1f2823c9ece Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 19 Sep 2024 19:23:48 +0200 Subject: [PATCH 1/3] Implement `cudax::shared_resource` (#2398) * Implement `cudax::shared_resource` We currently have two basic building blocks around memory resources, `any_resource` and `resource_ref`. However, while they make owning and sharing resources much easier, we can still run into lifetime issues. If a user wants to pass a resource into a library function that might exceed the lifetime of the resource, they would need to move it into an any_resource. However, they also might want to share that resource among multiple functions, e.g a pool allocator. We need a way to properly share a resource in those circumstances. Enter `shared_resource`. Rather than storing an `any_resource` this holds a `shared_ptr`. With that we can happily copy / move them around and without touching the stored resource. Co-authored-by: Eric Niebler Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- .../__memory_resource/shared_resource.cuh | 273 ++++++++++++++++++ .../cuda/experimental/memory_resource.cuh | 1 + cudax/test/CMakeLists.txt | 1 + cudax/test/memory_resource/any_resource.cu | 177 +----------- cudax/test/memory_resource/shared_resource.cu | 168 +++++++++++ cudax/test/memory_resource/test_resource.h | 179 ++++++++++++ docs/cudax/memory_resource.rst | 4 + 7 files changed, 627 insertions(+), 176 deletions(-) create mode 100644 cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh create mode 100644 cudax/test/memory_resource/shared_resource.cu create mode 100644 cudax/test/memory_resource/test_resource.h diff --git a/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh new file mode 100644 index 0000000000..bdd774f216 --- /dev/null +++ b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh @@ -0,0 +1,273 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H +#define _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// If the memory resource header was included without the experimental flag, +// tell the user to define the experimental flag. +#if defined(_CUDA_MEMORY_RESOURCE) && !defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE) +# error "To use the experimental memory resource, define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE" +#endif + +// cuda::mr is unavable on MSVC 2017 +#if defined(_CCCL_COMPILER_MSVC_2017) +# error "The shared_resource header is not supported on MSVC 2017" +#endif + +#if !defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE) +# define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include + +namespace cuda::experimental::mr +{ + +//! @rst +//! .. _cudax-memory-resource-shared-resource: +//! +//! Resource wrapper to share ownership of a resource +//! -------------------------------------------------- +//! +//! ``shared_resource`` holds a reference counted instance of a memory resource. This allows +//! the user to pass a resource around with reference semantics while avoiding lifetime issues. +//! +//! @note ``shared_resource`` satisfies the ``cuda::mr::async_resource`` concept iff \tparam _Resource satisfies it. +//! @tparam _Resource The resource type to hold. +//! @endrst +template +struct shared_resource +{ + static_assert(_CUDA_VMR::resource<_Resource>, ""); + + //! @brief Constructs a \c shared_resource refering to an object of type \c _Resource + //! that has been constructed with arguments \c __args. The \c _Resource object is + //! dynamically allocated with \c new. + //! @param __args The arguments to be passed to the \c _Resource constructor. + template + explicit shared_resource(_Args&&... __args) + : __control_block(new _Control_block{_Resource{_CUDA_VSTD::forward<_Args>(__args)...}, 1}) + {} + + //! @brief Copy-constructs a \c shared_resource object resulting in an copy that shares + //! ownership of the wrapped resource with \c __other. + //! @param __other The \c shared_resource object to copy from. + shared_resource(const shared_resource& __other) noexcept + : __control_block(__other.__control_block) + { + if (__control_block) + { + __control_block->__ref_count.fetch_add(1, _CUDA_VSTD::memory_order_relaxed); + } + } + + //! @brief Move-constructs a \c shared_resource assuming ownership of the resource stored + //! in \c __other. + //! @param __other The \c shared_resource object to move from. + //! @post \c __other is left in a valid but unspecified state. + shared_resource(shared_resource&& __other) noexcept + : __control_block(_CUDA_VSTD::exchange(__other.__control_block, nullptr)) + {} + + //! @brief Releases the reference held by this \c shared_resource object. If this is the + //! last reference to the wrapped resource, the resource is deleted. + ~shared_resource() + { + if (__control_block && __control_block->__ref_count.fetch_sub(1, _CUDA_VSTD::memory_order_acq_rel) == 1) + { + delete __control_block; + } + } + + //! @brief Copy-assigns from \c __other. Self-assignment is a no-op. Otherwise, the reference + //! held by this \c shared_resource object is released and a new reference is acquired to the + //! wrapped resource of \c __other, if any. + //! @param __other The \c shared_resource object to copy from. + shared_resource& operator=(const shared_resource& __other) noexcept + { + if (this != &__other) + { + shared_resource(__other).swap(*this); + } + + return *this; + } + + //! @brief Move-assigns from \c __other. Self-assignment is a no-op. Otherwise, the reference + //! held by this \c shared_resource object is released, while the reference held by \c __other + //! is transferred to this object. + //! @param __other The \c shared_resource object to move from. + /// @post \c __other is left in a valid but unspecified state. + shared_resource& operator=(shared_resource&& __other) noexcept + { + if (this != &__other) + { + shared_resource(_CUDA_VSTD::move(__other)).swap(*this); + } + + return *this; + } + + //! @brief Swaps a \c shared_resource with another one. + //! @param __other The other \c shared_resource. + void swap(shared_resource& __other) noexcept + { + _CUDA_VSTD::swap(__control_block, __other.__control_block); + } + + //! @brief Swaps a \c shared_resource with another one. + //! @param __other The other \c shared_resource. + friend void swap(shared_resource& __left, shared_resource& __right) noexcept + { + __left.swap(__right); + } + + //! @brief Allocate memory of size at least \p __bytes using the stored resource. + //! @param __bytes The size in bytes of the allocation. + //! @param __alignment The requested alignment of the allocation. + //! @return Pointer to the newly allocated memory + _CCCL_NODISCARD void* allocate(size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t)) + { + return __control_block->__resource.allocate(__bytes, __alignment); + } + + //! @brief Deallocate memory pointed to by \p __ptr using the stored resource. + //! @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate` + //! @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr. + //! @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr. + void deallocate(void* __ptr, size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t)) noexcept + { + __control_block->__resource.deallocate(__ptr, __bytes, __alignment); + } + + //! @brief Enqueues an allocation of memory of size at least \p __bytes using + //! the wrapped resource. The allocation is performed asynchronously on stream \c __stream. + //! @pre \c _Resource must satisfy \c async_resource. + //! @param __bytes The size in bytes of the allocation. + //! @param __alignment The requested alignment of the allocation. + //! @return Pointer to the newly allocated memory. + //! @note The caller is responsible for ensuring that the memory is not accessed until the + //! operation has completed. + _LIBCUDACXX_TEMPLATE(class _ThisResource = _Resource) + _LIBCUDACXX_REQUIRES(_CUDA_VMR::async_resource<_ThisResource>) + _CCCL_NODISCARD void* async_allocate(size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream) + { + return this->__control_block->__resource.async_allocate(__bytes, __alignment, __stream); + } + + //! @brief Enqueues the deallocation of memory pointed to by \c __ptr. The deallocation is + //! performed asynchronously on stream \c __stream. + //! @pre \c _Resource must satisfy \c async_resource. + //! @param __bytes The number of bytes that was passed to the `async_allocate` call that returned + //! \p __ptr. + //! @param __alignment The alignment that was passed to the `async_allocate` call that returned + //! \p __ptr. + //! @note The caller is responsible for ensuring that the memory is not accessed after the + //! operation has completed. + _LIBCUDACXX_TEMPLATE(class _ThisResource = _Resource) + _LIBCUDACXX_REQUIRES(_CUDA_VMR::async_resource<_ThisResource>) + void async_deallocate(void* __ptr, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream) + { + this->__control_block->__resource.async_deallocate(__ptr, __bytes, __alignment, __stream); + } + + //! @brief Equality comparison between two \c shared_resource + //! @param __lhs The first \c shared_resource + //! @param __rhs The other \c shared_resource + //! @return Checks whether the objects refer to resources that compare equal. + _CCCL_NODISCARD_FRIEND bool operator==(const shared_resource& __lhs, const shared_resource& __rhs) + { + if (__lhs.__control_block == __rhs.__control_block) + { + return true; + } + + if (__lhs.__control_block == nullptr || __rhs.__control_block == nullptr) + { + return false; + } + + return __lhs.__control_block->__resource == __rhs.__control_block->__resource; + } + + //! @brief Equality comparison between two \c shared_resource + //! @param __lhs The first \c shared_resource + //! @param __rhs The other \c shared_resource + //! @return Checks whether the objects refer to resources that compare unequal. + _CCCL_NODISCARD_FRIEND bool operator!=(const shared_resource& __lhs, const shared_resource& __rhs) + { + return !(__lhs == __rhs); + } + + //! @brief Forwards the stateless properties + _LIBCUDACXX_TEMPLATE(class _Property) + _LIBCUDACXX_REQUIRES((!property_with_value<_Property>) _LIBCUDACXX_AND(has_property<_Resource, _Property>)) + friend void get_property(const shared_resource&, _Property) noexcept {} + + //! @brief Forwards the stateful properties + _LIBCUDACXX_TEMPLATE(class _Property) + _LIBCUDACXX_REQUIRES(property_with_value<_Property> _LIBCUDACXX_AND(has_property<_Resource, _Property>)) + _CCCL_NODISCARD_FRIEND __property_value_t<_Property> get_property(const shared_resource& __self, _Property) noexcept + { + return get_property(__self.__control_block->__resource, _Property{}); + } + +private: + // Use a custom shared_ptr implementation because (a) we don't need to support weak_ptr so we only + // need one pointer, not two, and (b) this implementation can work on device also. + struct _Control_block + { + _Resource __resource; + _CUDA_VSTD::atomic __ref_count; + }; + + _Control_block* __control_block; +}; + +//! @rst +//! .. _cudax-memory-resource-make-shared-resource: +//! +//! Factory function for `shared_resource` objects +//! ----------------------------------------------- +//! +//! ``make_any_resource`` constructs an :ref:`shared_resource ` object that wraps +//! a newly constructed instance of the given resource type. The resource type must satisfy the ``cuda::mr::resource`` +//! concept and provide all of the properties specified in the template parameter pack. +//! +//! @param __args The arguments used to construct the instance of the resource type. +//! +//! @endrst +template +auto make_shared_resource(_Args&&... __args) -> shared_resource<_Resource> +{ + static_assert(_CUDA_VMR::resource<_Resource>, "_Resource does not satisfy the cuda::mr::resource concept"); + return shared_resource<_Resource>{_CUDA_VSTD::forward<_Args>(__args)...}; +} + +} // namespace cuda::experimental::mr + +#endif // _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H diff --git a/cudax/include/cuda/experimental/memory_resource.cuh b/cudax/include/cuda/experimental/memory_resource.cuh index 3ebce76451..d84559142e 100644 --- a/cudax/include/cuda/experimental/memory_resource.cuh +++ b/cudax/include/cuda/experimental/memory_resource.cuh @@ -14,5 +14,6 @@ #include #include #include +#include #endif // __CUDAX_MEMORY_RESOURCE___ diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index a50ab0b1ce..3f9b29b1ab 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -95,6 +95,7 @@ foreach(cn_target IN LISTS cudax_TARGETS) memory_resource/any_resource.cu memory_resource/async_memory_pool.cu memory_resource/async_memory_resource.cu + memory_resource/shared_resource.cu ) cudax_add_catch2_test(test_target async_tests ${cn_target} diff --git a/cudax/test/memory_resource/any_resource.cu b/cudax/test/memory_resource/any_resource.cu index ef6d1ef948..cc76f1755a 100644 --- a/cudax/test/memory_resource/any_resource.cu +++ b/cudax/test/memory_resource/any_resource.cu @@ -10,185 +10,10 @@ #include -#include -#include - -#include "cuda/std/detail/libcxx/include/cstddef" +#include "test_resource.h" #include #include -using std::size_t; -using std::uintptr_t; - -struct Counts -{ - int object_count = 0; - int move_count = 0; - int copy_count = 0; - int allocate_count = 0; - int deallocate_count = 0; - int equal_to_count = 0; - int new_count = 0; - int delete_count = 0; - - friend std::ostream& operator<<(std::ostream& os, const Counts& counts) - { - return os - << "object: " << counts.object_count << ", " // - << "move: " << counts.move_count << ", " // - << "copy: " << counts.copy_count << ", " // - << "allocate: " << counts.allocate_count << ", " // - << "deallocate: " << counts.deallocate_count << ", " // - << "equal_to: " << counts.equal_to_count << ", " // - << "new: " << counts.new_count << ", " // - << "delete: " << counts.delete_count; - } - - friend bool operator==(const Counts& lhs, const Counts& rhs) noexcept - { - return lhs.object_count == rhs.object_count && // - lhs.move_count == rhs.move_count && // - lhs.copy_count == rhs.copy_count && // - lhs.allocate_count == rhs.allocate_count && // - lhs.deallocate_count == rhs.deallocate_count && // - lhs.equal_to_count == rhs.equal_to_count && // - lhs.new_count == rhs.new_count && // - lhs.delete_count == rhs.delete_count; // - } - - friend bool operator!=(const Counts& lhs, const Counts& rhs) noexcept - { - return !(lhs == rhs); - } -}; - -struct test_fixture_ -{ - Counts counts; - size_t bytes_ = 0; - size_t align_ = 0; - static thread_local Counts* counts_; - - test_fixture_() noexcept - : counts() - { - counts_ = &counts; - } - - size_t bytes(size_t sz) noexcept - { - bytes_ = sz; - return bytes_; - } - - size_t align(size_t align) noexcept - { - align_ = align; - return align_; - } -}; - -thread_local Counts* test_fixture_::counts_ = nullptr; - -template -using test_fixture = test_fixture_; - -template -struct test_resource -{ - int data; - test_fixture_* fixture; - T cookie[2] = {0xDEADBEEF, 0xDEADBEEF}; - - explicit test_resource(int i, test_fixture_* fix) noexcept - : data(i) - , fixture(fix) - { - ++fixture->counts.object_count; - } - - test_resource(test_resource&& other) noexcept - : data(other.data) - , fixture(other.fixture) - { - other._assert_valid(); - ++fixture->counts.move_count; - ++fixture->counts.object_count; - other.cookie[0] = other.cookie[1] = 0x0C07FEFE; - } - - test_resource(const test_resource& other) noexcept - : data(other.data) - , fixture(other.fixture) - { - other._assert_valid(); - ++fixture->counts.copy_count; - ++fixture->counts.object_count; - } - - ~test_resource() - { - --fixture->counts.object_count; - } - - void* allocate(std::size_t bytes, std::size_t align) - { - _assert_valid(); - CHECK(bytes == fixture->bytes_); - CHECK(align == fixture->align_); - ++fixture->counts.allocate_count; - return fixture; - } - - void deallocate(void* ptr, std::size_t bytes, std::size_t align) noexcept - { - _assert_valid(); - CHECK(ptr == fixture); - CHECK(bytes == fixture->bytes_); - CHECK(align == fixture->align_); - ++fixture->counts.deallocate_count; - return; - } - - friend bool operator==(const test_resource& lhs, const test_resource& rhs) - { - lhs._assert_valid(); - rhs._assert_valid(); - ++lhs.fixture->counts.equal_to_count; - return lhs.data == rhs.data; - } - - friend bool operator!=(const test_resource& lhs, const test_resource& rhs) - { - FAIL("any_resource should only be calling operator=="); - return lhs.data != rhs.data; - } - - void _assert_valid() const noexcept - { - REQUIRE(cookie[0] == 0xDEADBEEF); - REQUIRE(cookie[1] == 0xDEADBEEF); - } - - static void* operator new(::cuda::std::size_t size) - { - ++test_fixture_::counts_->new_count; - return ::operator new(size); - } - - static void operator delete(void* pv) noexcept - { - ++test_fixture_::counts_->delete_count; - return ::operator delete(pv); - } -}; - -using big_resource = test_resource; -using small_resource = test_resource; - -static_assert(sizeof(big_resource) > sizeof(cuda::mr::_AnyResourceStorage)); -static_assert(sizeof(small_resource) <= sizeof(cuda::mr::_AnyResourceStorage)); - TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", big_resource, small_resource) { using TestResource = TestType; diff --git a/cudax/test/memory_resource/shared_resource.cu b/cudax/test/memory_resource/shared_resource.cu new file mode 100644 index 0000000000..74d8376f55 --- /dev/null +++ b/cudax/test/memory_resource/shared_resource.cu @@ -0,0 +1,168 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "test_resource.h" +#include +#include + +TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource]", big_resource, small_resource) +{ + using TestResource = TestType; + + SECTION("construct and destruct") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::shared_resource mr{42, this}; + ++expected.object_count; + CHECK(this->counts == expected); + } + + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("copy and move") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::shared_resource mr{42, this}; + ++expected.object_count; + CHECK(this->counts == expected); + + auto mr2 = mr; + CHECK(this->counts == expected); + CHECK(mr == mr2); // pointers compare equal, no call to TestResource::operator== + CHECK(this->counts == expected); + + auto mr3 = std::move(mr); + CHECK(this->counts == expected); + CHECK(mr2 == mr3); // pointers compare equal, no call to TestResource::operator== + CHECK(this->counts == expected); + + cudax::mr::shared_resource mr4{TestResource{42, this}}; + ++expected.object_count; + ++expected.move_count; + CHECK(mr3 == mr4); // pointers are not equal, calls TestResource::operator== + ++expected.equal_to_count; + CHECK(this->counts == expected); + } + + expected.object_count -= 2; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("allocate and deallocate") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::shared_resource mr{42, this}; + ++expected.object_count; + CHECK(this->counts == expected); + + void* ptr = mr.allocate(bytes(50), align(8)); + CHECK(ptr == this); + ++expected.allocate_count; + CHECK(this->counts == expected); + + mr.deallocate(ptr, bytes(50), align(8)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("conversion to resource_ref") + { + Counts expected{}; + { + cudax::mr::shared_resource mr{42, this}; + ++expected.object_count; + CHECK(this->counts == expected); + + cuda::mr::resource_ref<> ref = mr; + + CHECK(this->counts == expected); + auto* ptr = ref.allocate(bytes(100), align(8)); + CHECK(ptr == this); + ++expected.allocate_count; + CHECK(this->counts == expected); + ref.deallocate(ptr, bytes(0), align(0)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("basic sanity test about shared resource handling") + { + Counts expected{}; + align(alignof(int) * 4); + { + bytes(42 * sizeof(int)); + cudax::uninitialized_buffer buffer{cudax::mr::shared_resource(42, this), 42}; + ++expected.object_count; + ++expected.allocate_count; + CHECK(this->counts == expected); + + // copying the shared_resource should not copy the stored resource + { + // accounting for new storage + bytes(1337 * sizeof(int)); + cudax::uninitialized_buffer other_buffer{buffer.get_resource(), 1337}; + ++expected.allocate_count; + CHECK(this->counts == expected); + } + + // The original resource is still alive, but the second allocation was released + bytes(42 * sizeof(int)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + + { + // Moving the resource should not do anything + cudax::uninitialized_buffer third_buffer = ::cuda::std::move(buffer); + CHECK(this->counts == expected); + } + + // The original shared_resource has been moved from so everything is gone already + --expected.object_count; + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + + // Nothing changes here as the first shared_resources has been moved from + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); +} diff --git a/cudax/test/memory_resource/test_resource.h b/cudax/test/memory_resource/test_resource.h new file mode 100644 index 0000000000..dfc103b1ac --- /dev/null +++ b/cudax/test/memory_resource/test_resource.h @@ -0,0 +1,179 @@ +#pragma once + +#include +#include + +#include +#include + +using std::size_t; +using std::uintptr_t; + +struct Counts +{ + int object_count = 0; + int move_count = 0; + int copy_count = 0; + int allocate_count = 0; + int deallocate_count = 0; + int equal_to_count = 0; + int new_count = 0; + int delete_count = 0; + + friend std::ostream& operator<<(std::ostream& os, const Counts& counts) + { + return os + << "object: " << counts.object_count << ", " // + << "move: " << counts.move_count << ", " // + << "copy: " << counts.copy_count << ", " // + << "allocate: " << counts.allocate_count << ", " // + << "deallocate: " << counts.deallocate_count << ", " // + << "equal_to: " << counts.equal_to_count << ", " // + << "new: " << counts.new_count << ", " // + << "delete: " << counts.delete_count; + } + + friend bool operator==(const Counts& lhs, const Counts& rhs) noexcept + { + return lhs.object_count == rhs.object_count && // + lhs.move_count == rhs.move_count && // + lhs.copy_count == rhs.copy_count && // + lhs.allocate_count == rhs.allocate_count && // + lhs.deallocate_count == rhs.deallocate_count && // + lhs.equal_to_count == rhs.equal_to_count && // + lhs.new_count == rhs.new_count && // + lhs.delete_count == rhs.delete_count; // + } + + friend bool operator!=(const Counts& lhs, const Counts& rhs) noexcept + { + return !(lhs == rhs); + } +}; + +struct test_fixture_ +{ + Counts counts; + size_t bytes_ = 0; + size_t align_ = 0; + static thread_local Counts* counts_; + + test_fixture_() noexcept + : counts() + { + counts_ = &counts; + } + + size_t bytes(size_t sz) noexcept + { + bytes_ = sz; + return bytes_; + } + + size_t align(size_t align) noexcept + { + align_ = align; + return align_; + } +}; + +inline thread_local Counts* test_fixture_::counts_ = nullptr; + +template +using test_fixture = test_fixture_; + +template +struct test_resource +{ + int data; + test_fixture_* fixture; + T cookie[2] = {0xDEADBEEF, 0xDEADBEEF}; + + explicit test_resource(int i, test_fixture_* fix) noexcept + : data(i) + , fixture(fix) + { + ++fixture->counts.object_count; + } + + test_resource(test_resource&& other) noexcept + : data(other.data) + , fixture(other.fixture) + { + other._assert_valid(); + ++fixture->counts.move_count; + ++fixture->counts.object_count; + other.cookie[0] = other.cookie[1] = 0x0C07FEFE; + } + + test_resource(const test_resource& other) noexcept + : data(other.data) + , fixture(other.fixture) + { + other._assert_valid(); + ++fixture->counts.copy_count; + ++fixture->counts.object_count; + } + + ~test_resource() + { + --fixture->counts.object_count; + } + + void* allocate(std::size_t bytes, std::size_t align) + { + _assert_valid(); + CHECK(bytes == fixture->bytes_); + CHECK(align == fixture->align_); + ++fixture->counts.allocate_count; + return fixture; + } + + void deallocate(void* ptr, std::size_t bytes, std::size_t align) noexcept + { + _assert_valid(); + CHECK(ptr == fixture); + CHECK(bytes == fixture->bytes_); + CHECK(align == fixture->align_); + ++fixture->counts.deallocate_count; + return; + } + + friend bool operator==(const test_resource& lhs, const test_resource& rhs) + { + lhs._assert_valid(); + rhs._assert_valid(); + ++lhs.fixture->counts.equal_to_count; + return lhs.data == rhs.data; + } + + friend bool operator!=(const test_resource& lhs, const test_resource& rhs) + { + FAIL("any_resource should only be calling operator=="); + return lhs.data != rhs.data; + } + + void _assert_valid() const noexcept + { + REQUIRE(cookie[0] == 0xDEADBEEF); + REQUIRE(cookie[1] == 0xDEADBEEF); + } + + static void* operator new(::cuda::std::size_t size) + { + ++test_fixture_::counts_->new_count; + return ::operator new(size); + } + + static void operator delete(void* pv) noexcept + { + ++test_fixture_::counts_->delete_count; + return ::operator delete(pv); + } +}; + +using big_resource = test_resource; +using small_resource = test_resource; + +static_assert(sizeof(big_resource) > sizeof(cuda::mr::_AnyResourceStorage)); +static_assert(sizeof(small_resource) <= sizeof(cuda::mr::_AnyResourceStorage)); diff --git a/docs/cudax/memory_resource.rst b/docs/cudax/memory_resource.rst index 580fe6cd23..63736806e9 100644 --- a/docs/cudax/memory_resource.rst +++ b/docs/cudax/memory_resource.rst @@ -12,6 +12,7 @@ Memory Resources ${repo_docs_api_path}/struct*async__memory__pool__properties* ${repo_docs_api_path}/class*async__memory__pool* ${repo_docs_api_path}/class*async__memory__resource* + ${repo_docs_api_path}/*shared__resource* The ```` header provides: - :ref:`any_resource ` and @@ -22,6 +23,9 @@ The ```` header provides: *stream-ordered* memory allocation tailored to the needs of CUDA C++ developers. This design builds off of the success of the `RAPIDS Memory Manager (RMM) `__ project and evolves the design based on lessons learned. + - :ref:`shared_resource ` a type erased reference counted memory resource. + In contrast to :ref:`any_resource ` it additionally provides shared ownership + semantics. ```` is not intended to replace RMM, but instead moves the definition of the memory allocation interface to a more centralized home in CCCL. RMM will remain as a collection of implementations of From 5e14128f6b6e1cd522e618ad36782449c01158a9 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 19 Sep 2024 19:38:39 +0200 Subject: [PATCH 2/3] Increase the libcu++ timeout (#2435) * Increase the libcu++ timeout We are frequently running into to the current test duration limit of 01:20, so add another 20 minutes to the timeout. --------- Co-authored-by: Allison Piper --- libcudacxx/test/libcudacxx/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libcudacxx/test/libcudacxx/CMakeLists.txt b/libcudacxx/test/libcudacxx/CMakeLists.txt index b699b4b89f..605c98e888 100644 --- a/libcudacxx/test/libcudacxx/CMakeLists.txt +++ b/libcudacxx/test/libcudacxx/CMakeLists.txt @@ -112,7 +112,8 @@ if (NOT LIBCUDACXX_TEST_WITH_NVRTC) add_custom_target(libcudacxx.test.lit.precompile DEPENDS libcudacxx.test.public_headers libcudacxx.test.internal_headers libcudacxx.test.public_headers_host_only COMMAND "${CMAKE_COMMAND}" -E env "LIBCUDACXX_SITE_CONFIG=${lit_site_cfg_path}" - "${libcudacxx_LIT}" -vv --no-progress-bar ${libcudacxx_LIT_FLAGS} "-Dexecutor=\"NoopExecutor()\"" "${libcudacxx_SOURCE_DIR}/test/libcudacxx" + "${libcudacxx_LIT}" -vv --no-progress-bar --time-tests ${libcudacxx_LIT_FLAGS} + "-Dexecutor=\"NoopExecutor()\"" "${libcudacxx_SOURCE_DIR}/test/libcudacxx" ) endif() @@ -125,12 +126,11 @@ set(libcudacxx_LIT_PARALLEL_LEVEL 8 CACHE STRING add_test(NAME libcudacxx.test.lit COMMAND "${CMAKE_COMMAND}" -E env "LIBCUDACXX_SITE_CONFIG=${lit_site_cfg_path}" - "${libcudacxx_LIT}" -vv --no-progress-bar ${libcudacxx_LIT_FLAGS} - -j "${libcudacxx_LIT_PARALLEL_LEVEL}" - "${libcudacxx_SOURCE_DIR}/test/libcudacxx" + "${libcudacxx_LIT}" -vv --no-progress-bar --time-tests ${libcudacxx_LIT_FLAGS} + -j "${libcudacxx_LIT_PARALLEL_LEVEL}" "${libcudacxx_SOURCE_DIR}/test/libcudacxx" ) set_tests_properties(libcudacxx.test.lit PROPERTIES - TIMEOUT 4800 + TIMEOUT 5000 RUN_SERIAL TRUE ) From 2fe09c8399bc1ea52d9681ad084ff87a6eaa3f79 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 19 Sep 2024 11:18:23 -0700 Subject: [PATCH 3/3] Move c/include/cccl/*.h files to c/include/cccl/c/*.h (#2428) * Move c/include/cccl/*.h files to c/include/cccl/c/*.h * Change `#warning` to `#error` (to improve the user experience). * Add comments to preprocessor conditionals. Co-authored-by: Michael Schellenberger Costa * Add comments to preprocessor conditionals. Co-authored-by: Michael Schellenberger Costa * Add comment to preprocessor conditional. --------- Co-authored-by: Michael Schellenberger Costa Co-authored-by: Allison Piper --- c/include/cccl/{ => c}/reduce.h | 10 ++++------ c/include/cccl/{ => c}/types.h | 16 +++++++--------- c/src/reduce.cu | 2 +- c/test/c2h.h | 2 +- 4 files changed, 13 insertions(+), 17 deletions(-) rename c/include/cccl/{ => c}/reduce.h (85%) rename c/include/cccl/{ => c}/types.h (78%) diff --git a/c/include/cccl/reduce.h b/c/include/cccl/c/reduce.h similarity index 85% rename from c/include/cccl/reduce.h rename to c/include/cccl/c/reduce.h index 5047625a85..1da7b51f01 100644 --- a/c/include/cccl/reduce.h +++ b/c/include/cccl/c/reduce.h @@ -11,12 +11,12 @@ #pragma once #ifndef CCCL_C_EXPERIMENTAL -# warning "C exposure is experimental and subject to change. Define CCCL_C_EXPERIMENTAL to acknowledge this warning." -#else // ^^^ !CCCL_C_EXPERIMENTAL ^^^ / vvv CCCL_C_EXPERIMENTAL vvv +# error "C exposure is experimental and subject to change. Define CCCL_C_EXPERIMENTAL to acknowledge this notice." +#endif // !CCCL_C_EXPERIMENTAL -# include +#include -# include +#include struct cccl_device_reduce_build_result_t { @@ -55,5 +55,3 @@ extern "C" CCCL_C_API CUresult cccl_device_reduce( CUstream stream) noexcept; extern "C" CCCL_C_API CUresult cccl_device_reduce_cleanup(cccl_device_reduce_build_result_t* bld_ptr); - -#endif // CCCL_C_EXPERIMENTAL diff --git a/c/include/cccl/types.h b/c/include/cccl/c/types.h similarity index 78% rename from c/include/cccl/types.h rename to c/include/cccl/c/types.h index 781b9f9ea6..f34466c755 100644 --- a/c/include/cccl/types.h +++ b/c/include/cccl/c/types.h @@ -11,14 +11,14 @@ #pragma once #ifndef CCCL_C_EXPERIMENTAL -# warning "C exposure is experimental and subject to change. Define CCCL_C_EXPERIMENTAL to acknowledge this warning." -#else // ^^^ !CCCL_C_EXPERIMENTAL ^^^ / vvv CCCL_C_EXPERIMENTAL vvv +# error "C exposure is experimental and subject to change. Define CCCL_C_EXPERIMENTAL to acknowledge this notice." +#endif // !CCCL_C_EXPERIMENTAL -# if defined(_WIN32) -# define CCCL_C_API __declspec(dllexport) -# else -# define CCCL_C_API __attribute__((visibility("default"))) -# endif +#if defined(_WIN32) +# define CCCL_C_API __declspec(dllexport) +#else // ^^^ _WIN32 ^^^ / vvv !_WIN32 vvv +# define CCCL_C_API __attribute__((visibility("default"))) +#endif // !_WIN32 enum class cccl_type_enum { @@ -81,5 +81,3 @@ struct cccl_iterator_t cccl_type_info value_type; void* state; }; - -#endif // CCCL_C_EXPERIMENTAL diff --git a/c/src/reduce.cu b/c/src/reduce.cu index 4badcd1ff0..fc88dd31a9 100644 --- a/c/src/reduce.cu +++ b/c/src/reduce.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/c/test/c2h.h b/c/test/c2h.h index e2b26895a8..e044d2e17a 100644 --- a/c/test/c2h.h +++ b/c/test/c2h.h @@ -23,7 +23,7 @@ #include #include -#include +#include #include static std::string inspect_sass(const void* cubin, size_t cubin_size)