From ce957394a3c9588aa4fe5b3323b8610949e404da Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 30 Jul 2024 08:31:00 -0700 Subject: [PATCH] Update bad links (#2080) * fix broken links * revert repo.toml * linkchecker fixes * fix .cuh errors * lint --- cub/CONTRIBUTING.md | 7 +------ cub/cub/block/block_discontinuity.cuh | 2 +- cub/cub/block/block_histogram.cuh | 2 +- cub/cub/block/block_merge_sort.cuh | 5 ++--- cub/cub/block/block_radix_sort.cuh | 14 ++++++-------- cub/cub/block/block_scan.cuh | 6 +++--- cub/cub/device/device_spmv.cuh | 3 ++- cub/cub/warp/warp_exchange.cuh | 10 +++++----- docs/cub/index.rst | 2 +- .../memory_access_properties/access_property.rst | 2 +- docs/libcudacxx/extended_api/memory_model.rst | 2 +- .../extended_api/synchronization_primitives.rst | 2 +- docs/libcudacxx/ptx.rst | 6 +++--- docs/libcudacxx/releases.rst | 2 +- docs/libcudacxx/releases/versioning.rst | 5 +++-- docs/libcudacxx/standard_api/time_library.rst | 2 +- docs/thrust/cmake_options.rst | 4 ++++ docs/thrust/releases/changelog.rst | 11 +++++------ thrust/README.md | 4 ++-- thrust/thrust/functional.h | 3 +-- thrust/thrust/replace.h | 4 ++-- 21 files changed, 47 insertions(+), 51 deletions(-) diff --git a/cub/CONTRIBUTING.md b/cub/CONTRIBUTING.md index 0b6813ea78..4002779dcd 100644 --- a/cub/CONTRIBUTING.md +++ b/cub/CONTRIBUTING.md @@ -17,7 +17,7 @@ changes. CUB's tests and examples can be built by configuring Thrust with the CMake option `THRUST_INCLUDE_CUB_CMAKE=ON`. This process is described in more detail in Thrust's -[CONTRIBUTING.md](https://nvidia.github.io/thrust/contributing.html). +[CONTRIBUTING.md](https://nvidia.github.io/cccl/thrust/contributing.html). The CMake options in the following section may be used to customize CUB's build process. Note that some of these are controlled by Thrust for compatibility and @@ -63,8 +63,3 @@ The configuration options for CUB are: - Enable separable compilation on all targets that are agnostic of RDC. - Targets that explicitly require RDC to be enabled or disabled will ignore this setting. - Default is `OFF`. - -# Development Model - -CUB follows the same development model as Thrust, described -[here](https://nvidia.github.io/thrust/releases/versioning.html). diff --git a/cub/cub/block/block_discontinuity.cuh b/cub/cub/block/block_discontinuity.cuh index b75016ac7f..2fb15e9059 100644 --- a/cub/cub/block/block_discontinuity.cuh +++ b/cub/cub/block/block_discontinuity.cuh @@ -28,7 +28,7 @@ /** * @file - * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for + * The cub::BlockDiscontinuity class provides [collective](../index.html#sec0) methods for * flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. */ diff --git a/cub/cub/block/block_histogram.cuh b/cub/cub/block/block_histogram.cuh index 3553ec79da..d5726f240f 100644 --- a/cub/cub/block/block_histogram.cuh +++ b/cub/cub/block/block_histogram.cuh @@ -28,7 +28,7 @@ /** * @file - * The cub::BlockHistogram class provides [collective](index.html#sec0) methods for + * The cub::BlockHistogram class provides [collective](../index.html#sec0) methods for * constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh index 5ca9500550..29510db5e9 100644 --- a/cub/cub/block/block_merge_sort.cuh +++ b/cub/cub/block/block_merge_sort.cuh @@ -721,10 +721,9 @@ private: * `{ [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }`. * * @par Re-using dynamically allocating shared memory - * The following example under the examples/block folder illustrates usage of + * The ``block/example_block_reduce_dyn_smem.cu`` example illustrates usage of * dynamically shared memory with BlockReduce and how to re-purpose - * the same memory region: - * example_block_reduce_dyn_smem.cu + * the same memory region. * * This example can be easily adapted to the storage required by BlockMergeSort. */ diff --git a/cub/cub/block/block_radix_sort.cuh b/cub/cub/block/block_radix_sort.cuh index 964f4fbe0e..4865099291 100644 --- a/cub/cub/block/block_radix_sort.cuh +++ b/cub/cub/block/block_radix_sort.cuh @@ -28,7 +28,7 @@ /** * @file - * The cub::BlockRadixSort class provides [collective](index.html#sec0) methods for radix + * The cub::BlockRadixSort class provides [collective](../index.html#sec0) methods for radix * sorting of items partitioned across a CUDA thread block. */ @@ -142,7 +142,7 @@ CUB_NAMESPACE_BEGIN //! @blockcollective{BlockRadixSort} //! //! The code snippet below illustrates a sort of 512 integer keys that -//! are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads +//! are partitioned in a [blocked arrangement](../index.html#sec5sec3) across 128 threads //! where each thread owns 4 consecutive items. //! //! .. tab-set-code:: @@ -199,10 +199,8 @@ CUB_NAMESPACE_BEGIN //! Re-using dynamically allocating shared memory //! -------------------------------------------------- //! -//! The following example under the examples/block folder illustrates usage of -//! dynamically shared memory with BlockReduce and how to re-purpose -//! the same memory region: -//! example_block_reduce_dyn_smem.cu +//! The ``block/example_block_reduce_dyn_smem.cu`` example illustrates usage of dynamically shared memory with +//! BlockReduce and how to re-purpose the same memory region. //! //! This example can be easily adapted to the storage required by BlockRadixSort. //! @endrst @@ -986,7 +984,7 @@ public: //! +++++++ //! //! The code snippet below illustrates a sort of 512 integer keys that - //! are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + //! are partitioned in a [blocked arrangement](../index.html#sec5sec3) across 128 threads //! where each thread owns 4 consecutive keys. //! //! .. code-block:: c++ @@ -1590,7 +1588,7 @@ public: //! +++++++ //! //! The code snippet below illustrates a sort of 512 integer keys and values that - //! are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 + //! are initially partitioned in a [blocked arrangement](../index.html#sec5sec3) across 128 //! threads where each thread owns 4 consecutive pairs. The final partitioning is striped. //! //! .. code-block:: c++ diff --git a/cub/cub/block/block_scan.cuh b/cub/cub/block/block_scan.cuh index a06b7c185f..df7ab6e814 100644 --- a/cub/cub/block/block_scan.cuh +++ b/cub/cub/block/block_scan.cuh @@ -1011,7 +1011,7 @@ public: //! +++++++ //! //! The code snippet below illustrates an exclusive prefix max scan of 512 integer - //! items that are partitioned in a [blocked arrangement](index.html#sec5sec3) + //! items that are partitioned in a [blocked arrangement](../index.html#sec5sec3) //! across 128 threads where each thread owns 4 consecutive items. //! //! .. code-block:: c++ @@ -2180,7 +2180,7 @@ public: //! +++++++ //! //! The code snippet below illustrates an inclusive prefix max scan of 512 integer items that - //! are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + //! are partitioned in a [blocked arrangement](../index.html#sec5sec3) across 128 threads //! where each thread owns 4 consecutive items. //! //! .. code-block:: c++ @@ -2314,7 +2314,7 @@ public: //! +++++++ //! //! The code snippet below illustrates an inclusive prefix max scan of 512 integer items that - //! are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + //! are partitioned in a [blocked arrangement](../index.html#sec5sec3) across 128 threads //! where each thread owns 4 consecutive items. //! //! .. code-block:: c++ diff --git a/cub/cub/device/device_spmv.cuh b/cub/cub/device/device_spmv.cuh index 93e51f2293..6d6d126482 100644 --- a/cub/cub/device/device_spmv.cuh +++ b/cub/cub/device/device_spmv.cuh @@ -67,7 +67,8 @@ CUB_NAMESPACE_BEGIN //! //! - ``A`` is an ``m * n`` sparse matrix whose non-zero structure is specified in //! `compressed-storage-row (CSR) format -//! `_ (i.e., three arrays: +//! `_ (i.e., three +//! arrays: //! ``values``, ``row_offsets``, and ``column_indices``) //! - ``x`` and ``y`` are dense vectors //! diff --git a/cub/cub/warp/warp_exchange.cuh b/cub/cub/warp/warp_exchange.cuh index 712d0a6bcd..79f422f5ab 100644 --- a/cub/cub/warp/warp_exchange.cuh +++ b/cub/cub/warp/warp_exchange.cuh @@ -27,7 +27,7 @@ /** * @file - * The cub::WarpExchange class provides [collective](index.html#sec0) + * The cub::WarpExchange class provides [collective](../index.html#sec0) * methods for rearranging data partitioned across a CUDA warp. */ @@ -68,7 +68,7 @@ using InternalWarpExchangeImpl = } // namespace detail /** - * @brief The WarpExchange class provides [collective](index.html#sec0) + * @brief The WarpExchange class provides [collective](../index.html#sec0) * methods for rearranging data partitioned across a CUDA warp. * * @tparam T @@ -94,10 +94,10 @@ using InternalWarpExchangeImpl = * partitioning of items across threads (where consecutive items belong to a * single thread). * - WarpExchange supports the following types of data exchanges: - * - Transposing between [blocked](index.html#sec5sec3) and - * [striped](index.html#sec5sec3) arrangements + * - Transposing between [blocked](../index.html#sec5sec3) and + * [striped](../index.html#sec5sec3) arrangements * - Scattering ranked items to a - * [striped arrangement](index.html#sec5sec3) + * [striped arrangement](../index.html#sec5sec3) * * @par A Simple Example * @par diff --git a/docs/cub/index.rst b/docs/cub/index.rst index 17f00719e0..f39df651bd 100644 --- a/docs/cub/index.rst +++ b/docs/cub/index.rst @@ -435,7 +435,7 @@ How is CUB different than Thrust and Modern GPU? CUB and Thrust -------------------------------------------------- -CUB and `Thrust `_ share some +CUB and `Thrust `_ share some similarities in that they both provide similar device-wide primitives for CUDA. However, they target different abstraction layers for parallel computing. Thrust abstractions are agnostic of any particular parallel framework (e.g., diff --git a/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst b/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst index 44006c52d4..762d0d0e0e 100644 --- a/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst +++ b/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst @@ -258,7 +258,7 @@ Mapping of access properties to NVVM-IR and the PTX ISA When ``cuda::access_property`` is applied to memory operation, it sometimes matches with some of the cache eviction priorities and cache -hints introduced in the `PTX ISA Version 7.4 `_. +hints introduced in the `PTX ISA Version 7.4 `_. See `Cache Eviction Priority Hints `_ - ``global``: ``evict_unchanged`` diff --git a/docs/libcudacxx/extended_api/memory_model.rst b/docs/libcudacxx/extended_api/memory_model.rst index 91048ca680..ff9f9ef44c 100644 --- a/docs/libcudacxx/extended_api/memory_model.rst +++ b/docs/libcudacxx/extended_api/memory_model.rst @@ -78,7 +78,7 @@ An atomic operation is atomic at the scope it specifies if: .. note:: If `hostNativeAtomicSupported` is `0`, atomic load or store operations at system scope that affect a naturally-aligned 16-byte wide object in - `unified memory `__ or + `unified memory `__ or `mapped memory `__ require system support. NVIDIA is not aware of any system that lacks this support and there is no CUDA API query available to detect such systems. diff --git a/docs/libcudacxx/extended_api/synchronization_primitives.rst b/docs/libcudacxx/extended_api/synchronization_primitives.rst index 9a9a08c45a..0d0101b08e 100644 --- a/docs/libcudacxx/extended_api/synchronization_primitives.rst +++ b/docs/libcudacxx/extended_api/synchronization_primitives.rst @@ -61,7 +61,7 @@ Synchronization Primitives primitive for constraining concurrent access - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.0 * - :ref:`cuda::binary_semaphore ` - - System wide `std::binary_semaphore `_ + - System wide `std::binary_semaphore `_ primitive for mutual exclusion - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.0 diff --git a/docs/libcudacxx/ptx.rst b/docs/libcudacxx/ptx.rst index c5fea593d5..c4483db2de 100644 --- a/docs/libcudacxx/ptx.rst +++ b/docs/libcudacxx/ptx.rst @@ -480,9 +480,9 @@ Instructions by section - No * - `wmma.store `__ - No - * - `wmma.mma `__ + * - `wmma.mma `__ - No - * - `mma `__ + * - `mma `__ - No * - `ldmatrix `__ - No @@ -490,7 +490,7 @@ Instructions by section - No * - `movmatrix `__ - No - * - `mma.sp `__ + * - `mma.sp `__ - No .. list-table:: `Asynchronous Warpgroup Level Matrix Multiply-Accumulate Instructions `__ diff --git a/docs/libcudacxx/releases.rst b/docs/libcudacxx/releases.rst index 182df6b351..cf91820fad 100644 --- a/docs/libcudacxx/releases.rst +++ b/docs/libcudacxx/releases.rst @@ -1,7 +1,7 @@ .. _libcudacxx-releases: Releases -============ +======== .. toctree:: :maxdepth: 1 diff --git a/docs/libcudacxx/releases/versioning.rst b/docs/libcudacxx/releases/versioning.rst index 703ffaa855..514aed184f 100644 --- a/docs/libcudacxx/releases/versioning.rst +++ b/docs/libcudacxx/releases/versioning.rst @@ -149,8 +149,9 @@ that the default ABI version may change in any release. A subset of older ABI versions can be used instead by defining ``_LIBCUDACXX_CUDA_ABI_VERSION`` to the desired version. -For more information on specific ABI versions, please see the `releases -section <../releases.md>`_ and `changelog `_. +For more information on specific ABI versions, please see the +:ref:`release section ` and +:ref:`changelog `. A program is ill-formed, no diagnostic required, if it uses two different translation units compiled with a different NVIDIA C++ diff --git a/docs/libcudacxx/standard_api/time_library.rst b/docs/libcudacxx/standard_api/time_library.rst index 176e2a9ef6..e28685730b 100644 --- a/docs/libcudacxx/standard_api/time_library.rst +++ b/docs/libcudacxx/standard_api/time_library.rst @@ -34,7 +34,7 @@ we use: - `GetSystemTimePreciseAsFileTime `_ and `GetSystemTimeAsFileTime `_ for host code on Windows. -- `clock_gettime(CLOCK_REALTIME, ...) `_ and `gettimeofday `_ +- `clock_gettime(CLOCK_REALTIME, ...) `_ and `gettimeofday `_ for host code on Linux, Android, and QNX. - `PTX's %globaltimer `_ for device code. diff --git a/docs/thrust/cmake_options.rst b/docs/thrust/cmake_options.rst index 47fb14e750..a3dab2487b 100644 --- a/docs/thrust/cmake_options.rst +++ b/docs/thrust/cmake_options.rst @@ -1,3 +1,5 @@ +.. _cmake-options: + CMake Options ============= @@ -83,6 +85,8 @@ Single Config CMake Options - Selects the C++ standard dialect to use. Default is ``14`` (C++14). +.. _cmake-multi-config-options: + Multi Config CMake Options -------------------------- diff --git a/docs/thrust/releases/changelog.rst b/docs/thrust/releases/changelog.rst index 477e3c74d7..9ace537b48 100644 --- a/docs/thrust/releases/changelog.rst +++ b/docs/thrust/releases/changelog.rst @@ -223,7 +223,7 @@ Thrust 1.17.0 is the final minor release of the 1.X series. This release provides GDB pretty-printers for device vectors/references, a new ``unique_count`` algorithm, and an easier way to create tagged Thrust iterators. Several documentation fixes are included, which can be found -on the new Thrust documentation site at https://nvidia.github.io/thrust. +on the new Thrust documentation site at https://nvidia.github.io/cccl/thrust/. We’ll be migrating existing documentation sources to this new location over the next few months. @@ -255,8 +255,7 @@ Other Enhancements - NVIDIA/thrust#1512: Use CUB to implement ``adjacent_difference``. - NVIDIA/thrust#1555: Use CUB to implement ``scan_by_key``. -- NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation at - https://nvidia.github.io/thrust. +- NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation - NVIDIA/thrust#1639: Fixed broken link in documentation. Thanks to @jrhemstad for this contribution. - NVIDIA/thrust#1644: Increase contrast of search input text in new doc @@ -792,7 +791,7 @@ New Features - NVIDIA/thrust#1159: CMake multi-config support, which allows multiple combinations of host and device systems to be built and tested at once. More details can be found here: - https://github.com/NVIDIA/thrust/blob/main/CONTRIBUTING.md#multi-config-cmake-options + :ref:`Multi Config CMake Options ` - CMake refactoring: - Added install targets to CMake builds. @@ -800,7 +799,7 @@ New Features - Thrust can be added to another CMake project by calling ``add_subdirectory`` with the Thrust source root (see NVIDIA/thrust#976). An example can be found here: - https://github.com/NVIDIA/thrust/blob/main/examples/cmake/add_subdir/CMakeLists.txt + https://github.com/NVIDIA/cccl/blob/main/thrust/examples/cmake/add_subdir/CMakeLists.txt - CMake < 3.15 is no longer supported. - Dialects are now configured through target properties. A new ``THRUST_CPP_DIALECT`` option has been added for single config @@ -831,7 +830,7 @@ Other Enhancements ~~~~~~~~~~~~~~~~~~ - Contributor documentation: - https://github.com/NVIDIA/thrust/blob/main/CONTRIBUTING.md + https://github.com/NVIDIA/cccl/blob/main/CONTRIBUTING.md - Code of Conduct: https://github.com/NVIDIA/thrust/blob/main/CODE_OF_CONDUCT.md. Thanks to Conor Hoekstra for this contribution. diff --git a/thrust/README.md b/thrust/README.md index 7a0c478c99..37103fee04 100644 --- a/thrust/README.md +++ b/thrust/README.md @@ -123,7 +123,7 @@ git clone --recursive https://github.com/NVIDIA/thrust.git ## Using Thrust From Your Project -For CMake-based projects, we provide a CMake package for use with `find_package`. See the [CMake README](https://github.com/NVIDIA/cccl/blob/main/docs/thrust/github_pages/setup/cmake_options.md) for more information. +For CMake-based projects, we provide a CMake package for use with `find_package`. See :ref:`CMake Options ` for more information. Thrust can also be added via `add_subdirectory` or tools like the [CMake Package Manager](https://github.com/cpm-cmake/CPM.cmake). For non-CMake projects, compile with: @@ -188,7 +188,7 @@ Some parts are distributed under the [Apache License v2.0] and the [Boost Licens [GitHub]: https://github.com/NVIDIA/cccl/tree/main/thrust -[contributing section]: https://nvidia.github.io/thrust/contributing.html +[contributing section]: https://nvidia.github.io/cccl/thrust/contributing.html [CMake build system]: https://cmake.org diff --git a/thrust/thrust/functional.h b/thrust/thrust/functional.h index 87ba0f78cd..4b88f46954 100644 --- a/thrust/thrust/functional.h +++ b/thrust/thrust/functional.h @@ -1249,8 +1249,7 @@ _CCCL_SUPPRESS_DEPRECATED_PUSH * \param pred The Adaptable Binary Predicate to negate. * \return A new object, npred such that npred(x,y) always returns * the same value as !pred(x,y). - * \tparam Binary Predicate is a model of Adaptable Binary Predicate. + * \tparam Binary Predicate is a model of an Adaptable Binary Predicate. * \see binary_negate * \see not1 */ diff --git a/thrust/thrust/replace.h b/thrust/thrust/replace.h index 5a455c385a..5d0b58e6d0 100644 --- a/thrust/thrust/replace.h +++ b/thrust/thrust/replace.h @@ -54,7 +54,7 @@ THRUST_NAMESPACE_BEGIN * \tparam DerivedPolicy The name of the derived execution policy. * \tparam ForwardIterator is a model of Forward * Iterator, and \p ForwardIterator is mutable. \tparam T is a model of Assignable, \p T is a model of Assignable, \p T is a model of EqualityComparable, objects of \p T may be * compared for equality with objects of \p ForwardIterator's \c value_type, and \p T is convertible to \p * ForwardIterator's \c value_type. @@ -105,7 +105,7 @@ replace(const thrust::detail::execution_policy_base& exec, * * \tparam ForwardIterator is a model of Forward * Iterator, and \p ForwardIterator is mutable. \tparam T is a model of Assignable, \p T is a model of Assignable, \p T is a model of EqualityComparable, objects of \p T may be * compared for equality with objects of \p ForwardIterator's \c value_type, and \p T is convertible to \p * ForwardIterator's \c value_type.