From 8b2bf13a757283ff2806d532cad0e848e353b8f8 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 19 Sep 2024 20:51:52 +0200 Subject: [PATCH] Make `any_resource` emplacable (#2425) * Rename `async_any_resource` to `any_async_resource` * Add a way of constructing an `any_{async_}resource` from a set of arguments and a tag type --------- Co-authored-by: Allison Piper --- .../uninitialized_async_buffer.cuh | 4 +- .../__memory_resource/any_resource.cuh | 94 ++++++++- cudax/test/CMakeLists.txt | 1 + .../memory_resource/any_async_resource.cu | 178 ++++++++++++++++++ cudax/test/memory_resource/any_resource.cu | 17 ++ cudax/test/memory_resource/test_resource.h | 43 ++++- docs/cudax/index.rst | 2 +- docs/cudax/memory_resource.rst | 2 +- 8 files changed, 321 insertions(+), 20 deletions(-) create mode 100644 cudax/test/memory_resource/any_async_resource.cu diff --git a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh index 54115e4ccd..2f06a666e8 100644 --- a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh +++ b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh @@ -72,7 +72,7 @@ template class uninitialized_async_buffer { private: - using __async_resource = ::cuda::experimental::mr::async_any_resource<_Properties...>; + using __async_resource = ::cuda::experimental::mr::any_async_resource<_Properties...>; __async_resource __mr_; ::cuda::stream_ref __stream_ = {}; size_t __count_ = 0; @@ -204,7 +204,7 @@ public: } //! @rst - //! Returns a \c const reference to the :ref:`any_async_resource ` + //! Returns a \c const reference to the :ref:`any_async_resource ` //! that holds the memory resource used to allocate the buffer //! @endrst _CCCL_NODISCARD const __async_resource& get_resource() const noexcept diff --git a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh index eafd626739..e25ebac35f 100644 --- a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh @@ -42,11 +42,15 @@ #include #include #include +#include +#include #include #include #include +#include #include #include +#include namespace cuda::experimental::mr { @@ -77,19 +81,25 @@ private: using __vtable = _CUDA_VMR::_Filtered_vtable<_Properties...>; + //! @brief Validates that a set of \c _OtherProperties... is a superset of \c _Properties... . template static constexpr bool __properties_match = _CUDA_VSTD::__type_set_contains<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>; + //! @brief Validates that a passed in \c _Resource satisfies the \c resource or \c async_resource concept respectively + //! as well as all properties in \c _Properties... . + template + static constexpr bool __valid_resource = + _Alloc_type == _CUDA_VMR::_AllocType::_Async + ? _CUDA_VMR::async_resource_with<_Resource, _Properties...> + : _CUDA_VMR::resource_with<_Resource, _Properties...>; + public: //! @brief Constructs a \c basic_any_resource from a type that satisfies the \c resource or \c async_resource //! concept as well as all properties. //! @param __res The resource to be wrapped within the \c basic_any_resource. _LIBCUDACXX_TEMPLATE(class _Resource, class __resource_t = _CUDA_VSTD::remove_cvref_t<_Resource>) - _LIBCUDACXX_REQUIRES( - (!__is_basic_any_resource<_Resource>) _LIBCUDACXX_AND(_CUDA_VMR::resource_with<__resource_t, _Properties...>) - _LIBCUDACXX_AND(_Alloc_type != _CUDA_VMR::_AllocType::_Async - || (_CUDA_VMR::async_resource_with<__resource_t, _Properties...>) )) + _LIBCUDACXX_REQUIRES((!__is_basic_any_resource<_Resource>) _LIBCUDACXX_AND __valid_resource<__resource_t>) basic_any_resource(_Resource&& __res) noexcept : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>( nullptr, &_CUDA_VMR::__alloc_vtable<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning, __resource_t>) @@ -105,8 +115,31 @@ public: } } + //! @brief Constructs a \c basic_any_resource wrapping an object of type \c _Resource that + //! is constructed from \c __args... . \c _Resource must satisfy the \c resource or \c async_resource + //! concept, and it must provide all properties in \c _Properties... . + //! @param __args The arguments used to construct the instance of \c _Resource to be wrapped within the + //! \c basic_any_resource. + _LIBCUDACXX_TEMPLATE(class _Resource, class... _Args) + _LIBCUDACXX_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::is_constructible, _Resource, _Args...) + _LIBCUDACXX_AND __valid_resource<_Resource>) + basic_any_resource(_CUDA_VSTD::in_place_type_t<_Resource>, _Args&&... __args) noexcept + : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>( + nullptr, &_CUDA_VMR::__alloc_vtable<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning, _Resource>) + , __vtable(__vtable::template _Create<_Resource>()) + { + if constexpr (_CUDA_VMR::_IsSmall<_Resource>()) + { + ::new (static_cast(this->__object.__buf_)) _Resource(_CUDA_VSTD::forward<_Args>(__args)...); + } + else + { + this->__object.__ptr_ = new _Resource(_CUDA_VSTD::forward<_Args>(__args)...); + } + } + //! @brief Conversion from a \c basic_any_resource with the same set of properties but in a different order. - //! This constructor also handles conversion from \c async_any_resource to \c any_resource + //! This constructor also handles conversion from \c any_async_resource to \c any_resource //! @param __other The other \c basic_any_resource. _LIBCUDACXX_TEMPLATE(_CUDA_VMR::_AllocType _OtherAllocType, class... _OtherProperties) _LIBCUDACXX_REQUIRES( @@ -260,19 +293,64 @@ template using any_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Default, _Properties...>; //! @rst -//! .. _cudax-memory-resource-async-any-resource: +//! .. _cudax-memory-resource-any-async-resource: //! //! Type erased wrapper around an `async_resource` //! ----------------------------------------------- //! -//! ``async_any_resource`` wraps any given :ref:`async resource ` +//! ``any_async_resource`` wraps any given :ref:`async resource ` //! that satisfies the required properties. It owns the contained resource, taking care of construction / destruction. //! This makes it especially suited for use in e.g. container types that need to ensure that the lifetime of the //! container exceeds the lifetime of the memory resource used to allocate the storage //! //! @endrst template -using async_any_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Async, _Properties...>; +using any_async_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Async, _Properties...>; + +//! @rst +//! .. _cudax-memory-resource-make-any-resource: +//! +//! Factory function for `any_resource` objects +//! ------------------------------------------- +//! +//! ``make_any_resource`` constructs an :ref:`any_resource ` object that wraps a +//! newly constructed instance of the given resource type. The resource type must satisfy the ``cuda::mr::resource`` +//! concept and provide all of the properties specified in the template parameter pack. +//! +//! @param __args The arguments used to construct the instance of the resource type. +//! +//! @endrst +template +auto make_any_resource(_Args&&... __args) -> any_resource<_Properties...> +{ + static_assert(_CUDA_VMR::resource<_Resource>, "_Resource does not satisfy the cuda::mr::resource concept"); + static_assert(_CUDA_VMR::resource_with<_Resource, _Properties...>, + "Resource does not satisfy the required properties"); + return any_resource<_Properties...>{_CUDA_VSTD::in_place_type<_Resource>, _CUDA_VSTD::forward<_Args>(__args)...}; +} + +//! @rst +//! .. _cudax-memory-resource-make-any-async-resource: +//! +//! Factory function for `any_async_resource` objects +//! ------------------------------------------------- +//! +//! ``make_any_async_resource`` constructs an :ref:`any_async_resource ` +//! object that wraps a newly constructed instance of the given resource type. The resource type must satisfy the +//! ``cuda::mr::async_resource`` concept and provide all of the properties specified in the template parameter pack. +//! +//! @param __args The arguments used to construct the instance of the resource type. +//! +//! @endrst +template +auto make_any_async_resource(_Args&&... __args) -> any_async_resource<_Properties...> +{ + static_assert(_CUDA_VMR::async_resource<_Resource>, + "_Resource does not satisfy the cuda::mr::async_resource concept"); + static_assert(_CUDA_VMR::async_resource_with<_Resource, _Properties...>, + "Resource does not satisfy the required properties"); + return any_async_resource<_Properties...>{_CUDA_VSTD::in_place_type<_Resource>, _CUDA_VSTD::forward<_Args>(__args)...}; +} } // namespace cuda::experimental::mr diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 3f9b29b1ab..a1b554af92 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -92,6 +92,7 @@ foreach(cn_target IN LISTS cudax_TARGETS) ) cudax_add_catch2_test(test_target memory_resource ${cn_target} + memory_resource/any_async_resource.cu memory_resource/any_resource.cu memory_resource/async_memory_pool.cu memory_resource/async_memory_resource.cu diff --git a/cudax/test/memory_resource/any_async_resource.cu b/cudax/test/memory_resource/any_async_resource.cu new file mode 100644 index 0000000000..1bd6f1ea8f --- /dev/null +++ b/cudax/test/memory_resource/any_async_resource.cu @@ -0,0 +1,178 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include + +#include "test_resource.h" +#include +#include + +TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resource]", big_resource, small_resource) +{ + using TestResource = TestType; + constexpr bool is_big = sizeof(TestResource) > sizeof(cuda::mr::_AnyResourceStorage); + + SECTION("construct and destruct") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::any_async_resource<> mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("copy and move") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::any_async_resource<> mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + + auto mr2 = mr; + expected.new_count += is_big; + ++expected.copy_count; + ++expected.object_count; + CHECK(this->counts == expected); + CHECK(mr == mr2); + ++expected.equal_to_count; + CHECK(this->counts == expected); + + auto mr3 = std::move(mr); + expected.move_count += !is_big; // for big resources, move is a pointer swap + CHECK(this->counts == expected); + CHECK(mr2 == mr3); + ++expected.equal_to_count; + CHECK(this->counts == expected); + } + expected.delete_count += 2 * is_big; + expected.object_count -= 2; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("allocate and deallocate") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::any_async_resource<> mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + + void* ptr = mr.allocate(bytes(50), align(8)); + CHECK(ptr == this); + ++expected.allocate_count; + CHECK(this->counts == expected); + + mr.deallocate(ptr, bytes(50), align(8)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("allocate_async and deallocate_async") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::stream stream{}; + cudax::mr::any_async_resource<> mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + + void* ptr = mr.allocate_async(bytes(50), align(8), ::cuda::stream_ref{stream}); + CHECK(ptr == this); + ++expected.allocate_async_count; + CHECK(this->counts == expected); + + mr.deallocate_async(ptr, bytes(50), align(8), ::cuda::stream_ref{stream}); + ++expected.deallocate_async_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("conversion to resource_ref") + { + Counts expected{}; + { + cudax::mr::any_async_resource<> mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + + cuda::mr::resource_ref<> ref = mr; + + CHECK(this->counts == expected); + auto* ptr = ref.allocate(bytes(100), align(8)); + CHECK(ptr == this); + ++expected.allocate_count; + CHECK(this->counts == expected); + ref.deallocate(ptr, bytes(0), align(0)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("make_any_async_resource") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::any_async_resource<> mr = cudax::mr::make_any_async_resource(42, this); + expected.new_count += is_big; + ++expected.object_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + // Reset the counters: + this->counts = Counts(); +} diff --git a/cudax/test/memory_resource/any_resource.cu b/cudax/test/memory_resource/any_resource.cu index cc76f1755a..ed33bf72d2 100644 --- a/cudax/test/memory_resource/any_resource.cu +++ b/cudax/test/memory_resource/any_resource.cu @@ -129,4 +129,21 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", // Reset the counters: this->counts = Counts(); + + SECTION("make_any_resource") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::mr::any_resource<> mr = cudax::mr::make_any_resource(42, this); + expected.new_count += is_big; + ++expected.object_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + // Reset the counters: + this->counts = Counts(); } diff --git a/cudax/test/memory_resource/test_resource.h b/cudax/test/memory_resource/test_resource.h index dfc103b1ac..6c39b7f034 100644 --- a/cudax/test/memory_resource/test_resource.h +++ b/cudax/test/memory_resource/test_resource.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -11,14 +13,16 @@ using std::uintptr_t; struct Counts { - int object_count = 0; - int move_count = 0; - int copy_count = 0; - int allocate_count = 0; - int deallocate_count = 0; - int equal_to_count = 0; - int new_count = 0; - int delete_count = 0; + int object_count = 0; + int move_count = 0; + int copy_count = 0; + int allocate_count = 0; + int deallocate_count = 0; + int allocate_async_count = 0; + int deallocate_async_count = 0; + int equal_to_count = 0; + int new_count = 0; + int delete_count = 0; friend std::ostream& operator<<(std::ostream& os, const Counts& counts) { @@ -28,6 +32,8 @@ struct Counts << "copy: " << counts.copy_count << ", " // << "allocate: " << counts.allocate_count << ", " // << "deallocate: " << counts.deallocate_count << ", " // + << "allocate_async: " << counts.allocate_async_count << ", " // + << "deallocate_async: " << counts.deallocate_async_count << ", " // << "equal_to: " << counts.equal_to_count << ", " // << "new: " << counts.new_count << ", " // << "delete: " << counts.delete_count; @@ -40,6 +46,8 @@ struct Counts lhs.copy_count == rhs.copy_count && // lhs.allocate_count == rhs.allocate_count && // lhs.deallocate_count == rhs.deallocate_count && // + lhs.allocate_async_count == rhs.allocate_async_count && // + lhs.deallocate_async_count == rhs.deallocate_async_count && // lhs.equal_to_count == rhs.equal_to_count && // lhs.new_count == rhs.new_count && // lhs.delete_count == rhs.delete_count; // @@ -139,6 +147,25 @@ struct test_resource return; } + void* allocate_async(std::size_t bytes, std::size_t align, ::cuda::stream_ref) + { + _assert_valid(); + CHECK(bytes == fixture->bytes_); + CHECK(align == fixture->align_); + ++fixture->counts.allocate_async_count; + return fixture; + } + + void deallocate_async(void* ptr, std::size_t bytes, std::size_t align, ::cuda::stream_ref) noexcept + { + _assert_valid(); + CHECK(ptr == fixture); + CHECK(bytes == fixture->bytes_); + CHECK(align == fixture->align_); + ++fixture->counts.deallocate_async_count; + return; + } + friend bool operator==(const test_resource& lhs, const test_resource& rhs) { lhs._assert_valid(); diff --git a/docs/cudax/index.rst b/docs/cudax/index.rst index b9df0486db..4e4110db96 100644 --- a/docs/cudax/index.rst +++ b/docs/cudax/index.rst @@ -16,7 +16,7 @@ However, any feature within this library has important use cases and we encourag Specifically, ``cudax`` provides: - :ref:`uninitialized storage ` - - :ref:`an owning type erased memory resource ` + - :ref:`an owning type erased memory resource ` - :ref:`stream-ordered memory resources ` - dimensions description functionality diff --git a/docs/cudax/memory_resource.rst b/docs/cudax/memory_resource.rst index 63736806e9..e37c16c30a 100644 --- a/docs/cudax/memory_resource.rst +++ b/docs/cudax/memory_resource.rst @@ -16,7 +16,7 @@ Memory Resources The ```` header provides: - :ref:`any_resource ` and - :ref:`async_any_resource ` type erased memory resources similar to + :ref:`any_async_resource ` type erased memory resources similar to ``std::any``. In contrast to :ref:`resource_ref ` they own the contained resource. - :ref:`async_memory_resource ` A standard C++ interface for *heterogeneous*,