Address review comments

NVIDIA · May 7, 2024 · 5b0fc7e · 5b0fc7e
1 parent a5ea93a
commit 5b0fc7e
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 19 deletions.
diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_async_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/cuda_async_memory_resource.h
@@ -79,7 +79,6 @@ class cuda_async_memory_resource
 
   /**
    * @brief  Constructs the cuda_async_memory_resource from a cuda_memory_pool by calling pool_handle()
-   * @throws cuda_error if retrieving the default cudaMemPool_t fails
    */
   cuda_async_memory_resource(cuda_memory_pool& __cuda_pool) noexcept
       : __pool_(__cuda_pool.pool_handle())
@@ -170,7 +169,8 @@ class cuda_async_memory_resource
    * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate_async`
    * @param __bytes The number of bytes that was passed to the `allocate_async` call that returned \p __ptr.
    * @param __alignment The alignment that was passed to the `allocate_async` call that returned \p __ptr.
-   * @param __stream Stream that was passed to the `allocate_async` call that returned \p __ptr.
+   * @param __stream A stream that has a stream ordering relationship with the stream used in the `allocate_async` call
+   * that returned \p __ptr. See https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
    */
   void deallocate_async(void* __ptr, const size_t __bytes, const size_t __alignment, const ::cuda::stream_ref __stream)
   {
@@ -185,7 +185,8 @@ class cuda_async_memory_resource
    * @brief Deallocate memory pointed to by \p __ptr.
    * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate_async`
    * @param __bytes The number of bytes that was passed to the `allocate_async` call that returned \p __ptr.
-   * @param __stream Stream that was passed to the `allocate_async` call that returned \p __ptr.
+   * @param __stream A stream that has a stream ordering relationship with the stream used in the `allocate_async` call
+   * that returned \p __ptr. See https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
    */
   void deallocate_async(void* __ptr, size_t, const ::cuda::stream_ref __stream)
   {

diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_memory_pool.h b/libcudacxx/include/cuda/__memory_resource/cuda_memory_pool.h
@@ -141,11 +141,11 @@ class cuda_memory_pool
    * @returns The created cuda memory pool
    */
   _CCCL_NODISCARD static cudaMemPool_t __create_cuda_mempool(
+    const int __device_id,
     size_t __initial_pool_size,
     size_t __release_threshold,
     const cudaMemAllocationHandleType __allocation_handle_type) noexcept
   {
-    const int __device_id = _CUDA_VMR::__get_current_cuda_device();
     _LIBCUDACXX_ASSERT(_CUDA_VMR::__device_supports_pools(__device_id), "cudaMallocAsync not supported");
     _LIBCUDACXX_ASSERT(__cuda_supports_export_handle_type(__device_id, __allocation_handle_type),
                        "Requested IPC memory handle type not supported");
@@ -215,11 +215,13 @@ class cuda_memory_pool
    * @param release_threshold Optional release threshold size in bytes of the pool. If no value is
    * provided, the release threshold is set to the total amount of memory on the current device.
    */
-  cuda_memory_pool(
-    const size_t initial_pool_size                             = 0,
-    const size_t release_threshold                             = 0,
+  explicit cuda_memory_pool(
+    const int __device_id,
+    const size_t __initial_pool_size                           = 0,
+    const size_t __release_threshold                           = 0,
     const cudaMemAllocationHandleType __allocation_handle_type = cudaMemAllocationHandleType::cudaMemHandleTypeNone)
-      : __pool_handle_(__create_cuda_mempool(initial_pool_size, release_threshold, __allocation_handle_type))
+      : __pool_handle_(
+        __create_cuda_mempool(__device_id, __initial_pool_size, __release_threshold, __allocation_handle_type))
   {}
 
   /**

diff --git a/...udacxx/test/libcudacxx/cuda/memory_resource/cuda_async_memory_resource/construct.pass.cpp b/...udacxx/test/libcudacxx/cuda/memory_resource/cuda_async_memory_resource/construct.pass.cpp
@@ -137,7 +137,7 @@ void test()
 
   {
     const size_t initial_pool_size = 42;
-    cuda::mr::cuda_memory_pool pool{initial_pool_size};
+    cuda::mr::cuda_memory_pool pool{current_device, initial_pool_size};
     async_resource from_initial_pool_size{pool};
 
     ::cudaMemPool_t pool_handle = from_initial_pool_size.pool_handle();
@@ -156,7 +156,7 @@ void test()
   {
     const size_t initial_pool_size = 42;
     const size_t release_threshold = 20;
-    cuda::mr::cuda_memory_pool pool{initial_pool_size, release_threshold};
+    cuda::mr::cuda_memory_pool pool{current_device, initial_pool_size, release_threshold};
     async_resource with_threshold{pool};
 
     ::cudaMemPool_t pool_handle = with_threshold.pool_handle();
@@ -179,7 +179,7 @@ void test()
     const size_t release_threshold = 20;
     const cuda::mr::cudaMemAllocationHandleType allocation_handle{
       cuda::mr::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor};
-    cuda::mr::cuda_memory_pool pool{initial_pool_size, release_threshold, allocation_handle};
+    cuda::mr::cuda_memory_pool pool{current_device, initial_pool_size, release_threshold, allocation_handle};
     async_resource with_allocation_handle{pool};
 
     ::cudaMemPool_t pool_handle = with_allocation_handle.pool_handle();

diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/construct.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/construct.pass.cpp
@@ -78,13 +78,13 @@ void test()
   }
 
   {
-    cuda::mr::cuda_memory_pool default_constructed{};
-    assert(default_constructed.pool_handle() != current_default_pool);
+    cuda::mr::cuda_memory_pool from_device{current_device};
+    assert(from_device.pool_handle() != current_default_pool);
   }
 
   {
     const size_t initial_pool_size = 42;
-    cuda::mr::cuda_memory_pool from_initial_pool_size{initial_pool_size};
+    cuda::mr::cuda_memory_pool from_initial_pool_size{current_device, initial_pool_size};
 
     ::cudaMemPool_t pool_handle = from_initial_pool_size.pool_handle();
     assert(pool_handle != current_default_pool);
@@ -102,7 +102,7 @@ void test()
   {
     const size_t initial_pool_size = 42;
     const size_t release_threshold = 20;
-    cuda::mr::cuda_memory_pool with_threshold{initial_pool_size, release_threshold};
+    cuda::mr::cuda_memory_pool with_threshold{current_device, initial_pool_size, release_threshold};
 
     ::cudaMemPool_t pool_handle = with_threshold.pool_handle();
     assert(pool_handle != current_default_pool);
@@ -124,7 +124,8 @@ void test()
     const size_t release_threshold = 20;
     const cuda::mr::cudaMemAllocationHandleType allocation_handle{
       cuda::mr::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor};
-    cuda::mr::cuda_memory_pool with_allocation_handle{initial_pool_size, release_threshold, allocation_handle};
+    cuda::mr::cuda_memory_pool with_allocation_handle{
+      current_device, initial_pool_size, release_threshold, allocation_handle};
 
     ::cudaMemPool_t pool_handle = with_allocation_handle.pool_handle();
     assert(pool_handle != current_default_pool);

diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/equality.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/equality.pass.cpp
@@ -39,9 +39,9 @@ void test()
                        current_device);
   }
 
-  cuda::mr::cuda_memory_pool first{};
+  cuda::mr::cuda_memory_pool first{current_device};
   { // comparison against a plain cuda_memory_pool
-    cuda::mr::cuda_memory_pool second{};
+    cuda::mr::cuda_memory_pool second{current_device};
     assert(first == first);
     assert(first != second);
   }

diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/traits.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/traits.pass.cpp
@@ -20,7 +20,7 @@
 using pool = cuda::mr::cuda_memory_pool;
 static_assert(!cuda::std::is_trivial<pool>::value, "");
 static_assert(!cuda::std::is_trivially_default_constructible<pool>::value, "");
-static_assert(cuda::std::is_default_constructible<pool>::value, "");
+static_assert(!cuda::std::is_default_constructible<pool>::value, "");
 static_assert(!cuda::std::is_copy_constructible<pool>::value, "");
 static_assert(!cuda::std::is_move_constructible<pool>::value, "");
 static_assert(!cuda::std::is_copy_assignable<pool>::value, "");