Update bad links (#2080)

* fix broken links * revert repo.toml * linkchecker fixes * fix .cuh errors * lint
NVIDIA · Jul 30, 2024 · ce95739 · ce95739
1 parent d4f928e
commit ce95739
Show file tree

Hide file tree

Showing 21 changed files with 47 additions and 51 deletions.
diff --git a/cub/CONTRIBUTING.md b/cub/CONTRIBUTING.md
@@ -17,7 +17,7 @@ changes. CUB's tests and examples can be built by configuring Thrust with the
 CMake option `THRUST_INCLUDE_CUB_CMAKE=ON`.
 
 This process is described in more detail in Thrust's
-[CONTRIBUTING.md](https://nvidia.github.io/thrust/contributing.html).
+[CONTRIBUTING.md](https://nvidia.github.io/cccl/thrust/contributing.html).
 
 The CMake options in the following section may be used to customize CUB's build
 process. Note that some of these are controlled by Thrust for compatibility and
@@ -63,8 +63,3 @@ The configuration options for CUB are:
   - Enable separable compilation on all targets that are agnostic of RDC.
   - Targets that explicitly require RDC to be enabled or disabled will ignore this setting.
   - Default is `OFF`.
-
-# Development Model
-
-CUB follows the same development model as Thrust, described
-[here](https://nvidia.github.io/thrust/releases/versioning.html).
diff --git a/cub/cub/block/block_discontinuity.cuh b/cub/cub/block/block_discontinuity.cuh
@@ -28,7 +28,7 @@
 
 /**
  * @file
- * The cub::BlockDiscontinuity class provides [<em>collective</em>](index.html#sec0) methods for
+ * The cub::BlockDiscontinuity class provides [<em>collective</em>](../index.html#sec0) methods for
  * flagging discontinuities within an ordered set of items partitioned across a CUDA thread block.
  */
 

diff --git a/cub/cub/block/block_histogram.cuh b/cub/cub/block/block_histogram.cuh
@@ -28,7 +28,7 @@
 
 /**
  * @file
- * The cub::BlockHistogram class provides [<em>collective</em>](index.html#sec0) methods for
+ * The cub::BlockHistogram class provides [<em>collective</em>](../index.html#sec0) methods for
  * constructing block-wide histograms from data samples partitioned across a CUDA thread block.
  */
 

diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
@@ -721,10 +721,9 @@ private:
  * `{ [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }`.
  *
  * @par Re-using dynamically allocating shared memory
- * The following example under the examples/block folder illustrates usage of
+ * The ``block/example_block_reduce_dyn_smem.cu`` example illustrates usage of
  * dynamically shared memory with BlockReduce and how to re-purpose
- * the same memory region:
- * <a href="../../examples/block/example_block_reduce_dyn_smem.cu">example_block_reduce_dyn_smem.cu</a>
+ * the same memory region.
  *
  * This example can be easily adapted to the storage required by BlockMergeSort.
  */

diff --git a/cub/cub/block/block_radix_sort.cuh b/cub/cub/block/block_radix_sort.cuh
@@ -28,7 +28,7 @@
 
 /**
  * @file
- * The cub::BlockRadixSort class provides [<em>collective</em>](index.html#sec0) methods for radix
+ * The cub::BlockRadixSort class provides [<em>collective</em>](../index.html#sec0) methods for radix
  * sorting of items partitioned across a CUDA thread block.
  */
 
@@ -142,7 +142,7 @@ CUB_NAMESPACE_BEGIN
 //! @blockcollective{BlockRadixSort}
 //!
 //! The code snippet below illustrates a sort of 512 integer keys that
-//! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+//! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
 //! where each thread owns 4 consecutive items.
 //!
 //! .. tab-set-code::
@@ -199,10 +199,8 @@ CUB_NAMESPACE_BEGIN
 //! Re-using dynamically allocating shared memory
 //! --------------------------------------------------
 //!
-//! The following example under the examples/block folder illustrates usage of
-//! dynamically shared memory with BlockReduce and how to re-purpose
-//! the same memory region:
-//! <a href="../../examples/block/example_block_reduce_dyn_smem.cu">example_block_reduce_dyn_smem.cu</a>
+//! The ``block/example_block_reduce_dyn_smem.cu`` example illustrates usage of dynamically shared memory with
+//! BlockReduce and how to re-purpose the same memory region.
 //!
 //! This example can be easily adapted to the storage required by BlockRadixSort.
 //! @endrst
@@ -986,7 +984,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates a sort of 512 integer keys that
-  //! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+  //! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
   //! where each thread owns 4 consecutive keys.
   //!
   //! .. code-block:: c++
@@ -1590,7 +1588,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates a sort of 512 integer keys and values that
-  //! are initially partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128
+  //! are initially partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128
   //! threads where each thread owns 4 consecutive pairs.  The final partitioning is striped.
   //!
   //! .. code-block:: c++

diff --git a/cub/cub/block/block_scan.cuh b/cub/cub/block/block_scan.cuh
@@ -1011,7 +1011,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates an exclusive prefix max scan of 512 integer
-  //! items that are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3)
+  //! items that are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3)
   //! across 128 threads where each thread owns 4 consecutive items.
   //!
   //! .. code-block:: c++
@@ -2180,7 +2180,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates an inclusive prefix max scan of 512 integer items that
-  //! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+  //! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
   //! where each thread owns 4 consecutive items.
   //!
   //! .. code-block:: c++
@@ -2314,7 +2314,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates an inclusive prefix max scan of 512 integer items that
-  //! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+  //! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
   //! where each thread owns 4 consecutive items.
   //!
   //! .. code-block:: c++

diff --git a/cub/cub/device/device_spmv.cuh b/cub/cub/device/device_spmv.cuh
@@ -67,7 +67,8 @@ CUB_NAMESPACE_BEGIN
 //!
 //!  - ``A`` is an ``m * n`` sparse matrix whose non-zero structure is specified in
 //!    `compressed-storage-row (CSR) format
-//!    <http://en.wikipedia.org/wiki/Sparse_matrix#Compressed_row_Storage_.28CRS_or_CSR.29>`_ (i.e., three arrays:
+//!    <https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)>`_ (i.e., three
+//!    arrays:
 //!    ``values``, ``row_offsets``, and ``column_indices``)
 //!  - ``x`` and ``y`` are dense vectors
 //!

diff --git a/cub/cub/warp/warp_exchange.cuh b/cub/cub/warp/warp_exchange.cuh
@@ -27,7 +27,7 @@
 
 /**
  * @file
- * The cub::WarpExchange class provides [<em>collective</em>](index.html#sec0)
+ * The cub::WarpExchange class provides [<em>collective</em>](../index.html#sec0)
  * methods for rearranging data partitioned across a CUDA warp.
  */
 
@@ -68,7 +68,7 @@ using InternalWarpExchangeImpl =
 } // namespace detail
 
 /**
- * @brief The WarpExchange class provides [<em>collective</em>](index.html#sec0)
+ * @brief The WarpExchange class provides [<em>collective</em>](../index.html#sec0)
  *        methods for rearranging data partitioned across a CUDA warp.
  *
  * @tparam T
@@ -94,10 +94,10 @@ using InternalWarpExchangeImpl =
  *   partitioning of items across threads (where consecutive items belong to a
  *   single thread).
  * - WarpExchange supports the following types of data exchanges:
- *   - Transposing between [<em>blocked</em>](index.html#sec5sec3) and
- *     [<em>striped</em>](index.html#sec5sec3) arrangements
+ *   - Transposing between [<em>blocked</em>](../index.html#sec5sec3) and
+ *     [<em>striped</em>](../index.html#sec5sec3) arrangements
  *   - Scattering ranked items to a
- *     [<em>striped arrangement</em>](index.html#sec5sec3)
+ *     [<em>striped arrangement</em>](../index.html#sec5sec3)
  *
  * @par A Simple Example
  * @par

diff --git a/docs/cub/index.rst b/docs/cub/index.rst
@@ -435,7 +435,7 @@ How is CUB different than Thrust and Modern GPU?
 CUB and Thrust
 --------------------------------------------------
 
-CUB and `Thrust <http://thrust.github.io/>`_ share some
+CUB and `Thrust <https://nvidia.github.io/cccl/thrust/>`_ share some
 similarities in that they both provide similar device-wide primitives for CUDA.
 However, they target different abstraction layers for parallel computing.
 Thrust abstractions are agnostic of any particular parallel framework (e.g.,

diff --git a/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst b/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst
@@ -258,7 +258,7 @@ Mapping of access properties to NVVM-IR and the PTX ISA
 
 When ``cuda::access_property`` is applied to memory operation, it
 sometimes matches with some of the cache eviction priorities and cache
-hints introduced in the `PTX ISA Version 7.4 <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-isa-version-7-4>`_.
+hints introduced in the `PTX ISA Version 7.4 <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#changes-in-ptx-isa-version-7-4>`_.
 See `Cache Eviction Priority Hints <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-eviction-priority-hints>`_
 
 -  ``global``: ``evict_unchanged``

diff --git a/docs/libcudacxx/extended_api/memory_model.rst b/docs/libcudacxx/extended_api/memory_model.rst
@@ -78,7 +78,7 @@ An atomic operation is atomic at the scope it specifies if:
 .. note::
    If `hostNativeAtomicSupported` is `0`, atomic load or store operations at system scope that affect a
    naturally-aligned 16-byte wide object in
-   `unified memory <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#unified-memory>`__ or
+   `unified memory <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-unified-memory-programming-hd>`__ or
    `mapped memory <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#mapped-memory>`__ require system
    support. NVIDIA is not aware of any system that lacks this support and there is no CUDA API query available to
    detect such systems.

diff --git a/docs/libcudacxx/extended_api/synchronization_primitives.rst b/docs/libcudacxx/extended_api/synchronization_primitives.rst
@@ -61,7 +61,7 @@ Synchronization Primitives
        primitive for constraining concurrent access
      - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.0
    * - :ref:`cuda::binary_semaphore <libcudacxx-extended-api-synchronization-counting-semaphore>`
-     - System wide `std::binary_semaphore <https://en.cppreference.com/w/cpp/thread/binary_semaphore>`_
+     - System wide `std::binary_semaphore <https://en.cppreference.com/w/cpp/thread/counting_semaphore>`_
        primitive for mutual exclusion
      - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.0
 

diff --git a/docs/libcudacxx/ptx.rst b/docs/libcudacxx/ptx.rst
@@ -480,17 +480,17 @@ Instructions by section
      - No
    * - `wmma.store <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-wmma-store>`__
      - No
-   * - `wmma.mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-multiply-accumulate-instructions-wmma-mma>`__
+   * - `wmma.mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-wmma-mma>`__
      - No
-   * - `mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-multiply-accumulate-instructions-mma>`__
+   * - `mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma>`__
      - No
    * - `ldmatrix <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-load-instruction-ldmatrix>`__
      - No
    * - `stmatrix <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-stmatrix>`__
      - No
    * - `movmatrix <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-transpose-instruction-movmatrix>`__
      - No
-   * - `mma.sp <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#multiply-and-accumulate-instruction-mma-sp>`__
+   * - `mma.sp <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-sparse-mma>`__
      - No
 
 .. list-table:: `Asynchronous Warpgroup Level Matrix Multiply-Accumulate Instructions <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-multiply-accumulate-instructions>`__

diff --git a/docs/libcudacxx/releases.rst b/docs/libcudacxx/releases.rst
@@ -1,7 +1,7 @@
 .. _libcudacxx-releases:
 
 Releases
-============
+========
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/libcudacxx/releases/versioning.rst b/docs/libcudacxx/releases/versioning.rst
@@ -149,8 +149,9 @@ that the default ABI version may change in any release. A subset of
 older ABI versions can be used instead by defining
 ``_LIBCUDACXX_CUDA_ABI_VERSION`` to the desired version.
 
-For more information on specific ABI versions, please see the `releases
-section <../releases.md>`_ and `changelog <changelog.md>`_.
+For more information on specific ABI versions, please see the
+:ref:`release section <libcudacxx-releases>` and
+:ref:`changelog <libcudacxx-releases-changelog>`.
 
 A program is ill-formed, no diagnostic required, if it uses two
 different translation units compiled with a different NVIDIA C++

diff --git a/docs/libcudacxx/standard_api/time_library.rst b/docs/libcudacxx/standard_api/time_library.rst
@@ -34,7 +34,7 @@ we use:
 -  `GetSystemTimePreciseAsFileTime <https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getsystemtimepreciseasfiletime>`_ and
    `GetSystemTimeAsFileTime <https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getsystemtimeasfiletime>`_
    for host code on Windows.
--  `clock_gettime(CLOCK_REALTIME, ...) <https://linux.die.net/man/3/clock_gettime>`_ and `gettimeofday <https://linux.die.net/man/2/gettimeofday>`_
+-  `clock_gettime(CLOCK_REALTIME, ...) <https://man7.org/linux/man-pages/man3/clock_gettime.3.html>`_ and `gettimeofday <https://man7.org/linux/man-pages/man2/gettimeofday.2.html>`_
    for host code on Linux, Android, and QNX.
 -  `PTX's %globaltimer <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer>`_ for device code.
 

diff --git a/docs/thrust/cmake_options.rst b/docs/thrust/cmake_options.rst
@@ -1,3 +1,5 @@
+.. _cmake-options:
+
 CMake Options
 =============
 
@@ -83,6 +85,8 @@ Single Config CMake Options
    -  Selects the C++ standard dialect to use. Default is ``14``
       (C++14).
 
+.. _cmake-multi-config-options:
+
 Multi Config CMake Options
 --------------------------
 

diff --git a/docs/thrust/releases/changelog.rst b/docs/thrust/releases/changelog.rst
@@ -223,7 +223,7 @@ Thrust 1.17.0 is the final minor release of the 1.X series. This release
 provides GDB pretty-printers for device vectors/references, a new
 ``unique_count`` algorithm, and an easier way to create tagged Thrust
 iterators. Several documentation fixes are included, which can be found
-on the new Thrust documentation site at https://nvidia.github.io/thrust.
+on the new Thrust documentation site at https://nvidia.github.io/cccl/thrust/.
 We’ll be migrating existing documentation sources to this new location
 over the next few months.
 
@@ -255,8 +255,7 @@ Other Enhancements
 
 -  NVIDIA/thrust#1512: Use CUB to implement ``adjacent_difference``.
 -  NVIDIA/thrust#1555: Use CUB to implement ``scan_by_key``.
--  NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation at
-   https://nvidia.github.io/thrust.
+-  NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation
 -  NVIDIA/thrust#1639: Fixed broken link in documentation. Thanks to
    @jrhemstad for this contribution.
 -  NVIDIA/thrust#1644: Increase contrast of search input text in new doc
@@ -792,15 +791,15 @@ New Features
 -  NVIDIA/thrust#1159: CMake multi-config support, which allows multiple
    combinations of host and device systems to be built and tested at
    once. More details can be found here:
-   https://github.com/NVIDIA/thrust/blob/main/CONTRIBUTING.md#multi-config-cmake-options
+   :ref:`Multi Config CMake Options <cmake-multi-config-options>`
 -  CMake refactoring:
 
    -  Added install targets to CMake builds.
    -  Added support for CUB tests and examples.
    -  Thrust can be added to another CMake project by calling
       ``add_subdirectory`` with the Thrust source root (see
       NVIDIA/thrust#976). An example can be found here:
-      https://github.com/NVIDIA/thrust/blob/main/examples/cmake/add_subdir/CMakeLists.txt
+      https://github.com/NVIDIA/cccl/blob/main/thrust/examples/cmake/add_subdir/CMakeLists.txt
    -  CMake < 3.15 is no longer supported.
    -  Dialects are now configured through target properties. A new
       ``THRUST_CPP_DIALECT`` option has been added for single config
@@ -831,7 +830,7 @@ Other Enhancements
 ~~~~~~~~~~~~~~~~~~
 
 -  Contributor documentation:
-   https://github.com/NVIDIA/thrust/blob/main/CONTRIBUTING.md
+   https://github.com/NVIDIA/cccl/blob/main/CONTRIBUTING.md
 -  Code of Conduct:
    https://github.com/NVIDIA/thrust/blob/main/CODE_OF_CONDUCT.md. Thanks
    to Conor Hoekstra for this contribution.

diff --git a/thrust/README.md b/thrust/README.md
@@ -123,7 +123,7 @@ git clone --recursive https://github.com/NVIDIA/thrust.git
 
 ## Using Thrust From Your Project
 
-For CMake-based projects, we provide a CMake package for use with `find_package`. See the [CMake README](https://github.com/NVIDIA/cccl/blob/main/docs/thrust/github_pages/setup/cmake_options.md) for more information.
+For CMake-based projects, we provide a CMake package for use with `find_package`. See :ref:`CMake Options <cmake-options>` for more information.
 Thrust can also be added via `add_subdirectory` or tools like the [CMake Package Manager](https://github.com/cpm-cmake/CPM.cmake).
 
 For non-CMake projects, compile with:
@@ -188,7 +188,7 @@ Some parts are distributed under the [Apache License v2.0] and the [Boost Licens
 
 [GitHub]: https://github.com/NVIDIA/cccl/tree/main/thrust
 
-[contributing section]: https://nvidia.github.io/thrust/contributing.html
+[contributing section]: https://nvidia.github.io/cccl/thrust/contributing.html
 
 [CMake build system]: https://cmake.org
 

diff --git a/thrust/thrust/functional.h b/thrust/thrust/functional.h
@@ -1249,8 +1249,7 @@ _CCCL_SUPPRESS_DEPRECATED_PUSH
  *  \param pred The Adaptable Binary Predicate to negate.
  *  \return A new object, <tt>npred</tt> such that <tt>npred(x,y)</tt> always returns
  *          the same value as <tt>!pred(x,y)</tt>.
- *  \tparam Binary Predicate is a model of <a
- * href="https://en.cppreference.com/w/cpp/utility/functional/AdaptableBinaryPredicate">Adaptable Binary Predicate</a>.
+ *  \tparam Binary Predicate is a model of an Adaptable Binary Predicate.
  *  \see binary_negate
  *  \see not1
  */

diff --git a/thrust/thrust/replace.h b/thrust/thrust/replace.h
@@ -54,7 +54,7 @@ THRUST_NAMESPACE_BEGIN
  *  \tparam DerivedPolicy The name of the derived execution policy.
  *  \tparam ForwardIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/forward_iterator">Forward
  * Iterator</a>, and \p ForwardIterator is mutable. \tparam T is a model of <a
- * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable>Assignable">Assignable</a>, \p T is a model of <a
+ * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, \p T is a model of <a
  * href="https://en.cppreference.com/w/cpp/concepts/equality_comparable">EqualityComparable</a>, objects of \p T may be
  * compared for equality with objects of \p ForwardIterator's \c value_type, and \p T is convertible to \p
  * ForwardIterator's \c value_type.
@@ -105,7 +105,7 @@ replace(const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
  *
  *  \tparam ForwardIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/forward_iterator">Forward
  * Iterator</a>, and \p ForwardIterator is mutable. \tparam T is a model of <a
- * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable>Assignable">Assignable</a>, \p T is a model of <a
+ * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, \p T is a model of <a
  * href="https://en.cppreference.com/w/cpp/concepts/equality_comparable">EqualityComparable</a>, objects of \p T may be
  * compared for equality with objects of \p ForwardIterator's \c value_type, and \p T is convertible to \p
  * ForwardIterator's \c value_type.