From ce957394a3c9588aa4fe5b3323b8610949e404da Mon Sep 17 00:00:00 2001
From: Bryan Van de Ven <bryan@bokeh.org>
Date: Tue, 30 Jul 2024 08:31:00 -0700
Subject: [PATCH] Update bad links (#2080)

* fix broken links
* revert repo.toml
* linkchecker fixes
* fix .cuh errors
* lint
---
 cub/CONTRIBUTING.md                                |  7 +------
 cub/cub/block/block_discontinuity.cuh              |  2 +-
 cub/cub/block/block_histogram.cuh                  |  2 +-
 cub/cub/block/block_merge_sort.cuh                 |  5 ++---
 cub/cub/block/block_radix_sort.cuh                 | 14 ++++++--------
 cub/cub/block/block_scan.cuh                       |  6 +++---
 cub/cub/device/device_spmv.cuh                     |  3 ++-
 cub/cub/warp/warp_exchange.cuh                     | 10 +++++-----
 docs/cub/index.rst                                 |  2 +-
 .../memory_access_properties/access_property.rst   |  2 +-
 docs/libcudacxx/extended_api/memory_model.rst      |  2 +-
 .../extended_api/synchronization_primitives.rst    |  2 +-
 docs/libcudacxx/ptx.rst                            |  6 +++---
 docs/libcudacxx/releases.rst                       |  2 +-
 docs/libcudacxx/releases/versioning.rst            |  5 +++--
 docs/libcudacxx/standard_api/time_library.rst      |  2 +-
 docs/thrust/cmake_options.rst                      |  4 ++++
 docs/thrust/releases/changelog.rst                 | 11 +++++------
 thrust/README.md                                   |  4 ++--
 thrust/thrust/functional.h                         |  3 +--
 thrust/thrust/replace.h                            |  4 ++--
 21 files changed, 47 insertions(+), 51 deletions(-)
diff --git a/cub/CONTRIBUTING.md b/cub/CONTRIBUTING.md
index 0b6813ea78..4002779dcd 100644
--- a/cub/CONTRIBUTING.md
+++ b/cub/CONTRIBUTING.md
@@ -17,7 +17,7 @@ changes. CUB's tests and examples can be built by configuring Thrust with the
 CMake option `THRUST_INCLUDE_CUB_CMAKE=ON`.
 
 This process is described in more detail in Thrust's
-[CONTRIBUTING.md](https://nvidia.github.io/thrust/contributing.html).
+[CONTRIBUTING.md](https://nvidia.github.io/cccl/thrust/contributing.html).
 
 The CMake options in the following section may be used to customize CUB's build
 process. Note that some of these are controlled by Thrust for compatibility and
@@ -63,8 +63,3 @@ The configuration options for CUB are:
   - Enable separable compilation on all targets that are agnostic of RDC.
   - Targets that explicitly require RDC to be enabled or disabled will ignore this setting.
   - Default is `OFF`.
-
-# Development Model
-
-CUB follows the same development model as Thrust, described
-[here](https://nvidia.github.io/thrust/releases/versioning.html).
diff --git a/cub/cub/block/block_discontinuity.cuh b/cub/cub/block/block_discontinuity.cuh
index b75016ac7f..2fb15e9059 100644
--- a/cub/cub/block/block_discontinuity.cuh
+++ b/cub/cub/block/block_discontinuity.cuh
@@ -28,7 +28,7 @@
 
 /**
  * @file
- * The cub::BlockDiscontinuity class provides [<em>collective</em>](index.html#sec0) methods for
+ * The cub::BlockDiscontinuity class provides [<em>collective</em>](../index.html#sec0) methods for
  * flagging discontinuities within an ordered set of items partitioned across a CUDA thread block.
  */
 
diff --git a/cub/cub/block/block_histogram.cuh b/cub/cub/block/block_histogram.cuh
index 3553ec79da..d5726f240f 100644
--- a/cub/cub/block/block_histogram.cuh
+++ b/cub/cub/block/block_histogram.cuh
@@ -28,7 +28,7 @@
 
 /**
  * @file
- * The cub::BlockHistogram class provides [<em>collective</em>](index.html#sec0) methods for
+ * The cub::BlockHistogram class provides [<em>collective</em>](../index.html#sec0) methods for
  * constructing block-wide histograms from data samples partitioned across a CUDA thread block.
  */
 
diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
index 5ca9500550..29510db5e9 100644
--- a/cub/cub/block/block_merge_sort.cuh
+++ b/cub/cub/block/block_merge_sort.cuh
@@ -721,10 +721,9 @@ private:
  * `{ [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }`.
  *
  * @par Re-using dynamically allocating shared memory
- * The following example under the examples/block folder illustrates usage of
+ * The ``block/example_block_reduce_dyn_smem.cu`` example illustrates usage of
  * dynamically shared memory with BlockReduce and how to re-purpose
- * the same memory region:
- * <a href="../../examples/block/example_block_reduce_dyn_smem.cu">example_block_reduce_dyn_smem.cu</a>
+ * the same memory region.
  *
  * This example can be easily adapted to the storage required by BlockMergeSort.
  */
diff --git a/cub/cub/block/block_radix_sort.cuh b/cub/cub/block/block_radix_sort.cuh
index 964f4fbe0e..4865099291 100644
--- a/cub/cub/block/block_radix_sort.cuh
+++ b/cub/cub/block/block_radix_sort.cuh
@@ -28,7 +28,7 @@
 
 /**
  * @file
- * The cub::BlockRadixSort class provides [<em>collective</em>](index.html#sec0) methods for radix
+ * The cub::BlockRadixSort class provides [<em>collective</em>](../index.html#sec0) methods for radix
  * sorting of items partitioned across a CUDA thread block.
  */
 
@@ -142,7 +142,7 @@ CUB_NAMESPACE_BEGIN
 //! @blockcollective{BlockRadixSort}
 //!
 //! The code snippet below illustrates a sort of 512 integer keys that
-//! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+//! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
 //! where each thread owns 4 consecutive items.
 //!
 //! .. tab-set-code::
@@ -199,10 +199,8 @@ CUB_NAMESPACE_BEGIN
 //! Re-using dynamically allocating shared memory
 //! --------------------------------------------------
 //!
-//! The following example under the examples/block folder illustrates usage of
-//! dynamically shared memory with BlockReduce and how to re-purpose
-//! the same memory region:
-//! <a href="../../examples/block/example_block_reduce_dyn_smem.cu">example_block_reduce_dyn_smem.cu</a>
+//! The ``block/example_block_reduce_dyn_smem.cu`` example illustrates usage of dynamically shared memory with
+//! BlockReduce and how to re-purpose the same memory region.
 //!
 //! This example can be easily adapted to the storage required by BlockRadixSort.
 //! @endrst
@@ -986,7 +984,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates a sort of 512 integer keys that
-  //! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+  //! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
   //! where each thread owns 4 consecutive keys.
   //!
   //! .. code-block:: c++
@@ -1590,7 +1588,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates a sort of 512 integer keys and values that
-  //! are initially partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128
+  //! are initially partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128
   //! threads where each thread owns 4 consecutive pairs.  The final partitioning is striped.
   //!
   //! .. code-block:: c++
diff --git a/cub/cub/block/block_scan.cuh b/cub/cub/block/block_scan.cuh
index a06b7c185f..df7ab6e814 100644
--- a/cub/cub/block/block_scan.cuh
+++ b/cub/cub/block/block_scan.cuh
@@ -1011,7 +1011,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates an exclusive prefix max scan of 512 integer
-  //! items that are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3)
+  //! items that are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3)
   //! across 128 threads where each thread owns 4 consecutive items.
   //!
   //! .. code-block:: c++
@@ -2180,7 +2180,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates an inclusive prefix max scan of 512 integer items that
-  //! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+  //! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
   //! where each thread owns 4 consecutive items.
   //!
   //! .. code-block:: c++
@@ -2314,7 +2314,7 @@ public:
   //! +++++++
   //!
   //! The code snippet below illustrates an inclusive prefix max scan of 512 integer items that
-  //! are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec3) across 128 threads
+  //! are partitioned in a [<em>blocked arrangement</em>](../index.html#sec5sec3) across 128 threads
   //! where each thread owns 4 consecutive items.
   //!
   //! .. code-block:: c++
diff --git a/cub/cub/device/device_spmv.cuh b/cub/cub/device/device_spmv.cuh
index 93e51f2293..6d6d126482 100644
--- a/cub/cub/device/device_spmv.cuh
+++ b/cub/cub/device/device_spmv.cuh
@@ -67,7 +67,8 @@ CUB_NAMESPACE_BEGIN
 //!
 //!  - ``A`` is an ``m * n`` sparse matrix whose non-zero structure is specified in
 //!    `compressed-storage-row (CSR) format
-//!    <http://en.wikipedia.org/wiki/Sparse_matrix#Compressed_row_Storage_.28CRS_or_CSR.29>`_ (i.e., three arrays:
+//!    <https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)>`_ (i.e., three
+//!    arrays:
 //!    ``values``, ``row_offsets``, and ``column_indices``)
 //!  - ``x`` and ``y`` are dense vectors
 //!
diff --git a/cub/cub/warp/warp_exchange.cuh b/cub/cub/warp/warp_exchange.cuh
index 712d0a6bcd..79f422f5ab 100644
--- a/cub/cub/warp/warp_exchange.cuh
+++ b/cub/cub/warp/warp_exchange.cuh
@@ -27,7 +27,7 @@
 
 /**
  * @file
- * The cub::WarpExchange class provides [<em>collective</em>](index.html#sec0)
+ * The cub::WarpExchange class provides [<em>collective</em>](../index.html#sec0)
  * methods for rearranging data partitioned across a CUDA warp.
  */
 
@@ -68,7 +68,7 @@ using InternalWarpExchangeImpl =
 } // namespace detail
 
 /**
- * @brief The WarpExchange class provides [<em>collective</em>](index.html#sec0)
+ * @brief The WarpExchange class provides [<em>collective</em>](../index.html#sec0)
  *        methods for rearranging data partitioned across a CUDA warp.
  *
  * @tparam T
@@ -94,10 +94,10 @@ using InternalWarpExchangeImpl =
  *   partitioning of items across threads (where consecutive items belong to a
  *   single thread).
  * - WarpExchange supports the following types of data exchanges:
- *   - Transposing between [<em>blocked</em>](index.html#sec5sec3) and
- *     [<em>striped</em>](index.html#sec5sec3) arrangements
+ *   - Transposing between [<em>blocked</em>](../index.html#sec5sec3) and
+ *     [<em>striped</em>](../index.html#sec5sec3) arrangements
  *   - Scattering ranked items to a
- *     [<em>striped arrangement</em>](index.html#sec5sec3)
+ *     [<em>striped arrangement</em>](../index.html#sec5sec3)
  *
  * @par A Simple Example
  * @par
diff --git a/docs/cub/index.rst b/docs/cub/index.rst
index 17f00719e0..f39df651bd 100644
--- a/docs/cub/index.rst
+++ b/docs/cub/index.rst
@@ -435,7 +435,7 @@ How is CUB different than Thrust and Modern GPU?
 CUB and Thrust
 --------------------------------------------------
 
-CUB and `Thrust <http://thrust.github.io/>`_ share some
+CUB and `Thrust <https://nvidia.github.io/cccl/thrust/>`_ share some
 similarities in that they both provide similar device-wide primitives for CUDA.
 However, they target different abstraction layers for parallel computing.
 Thrust abstractions are agnostic of any particular parallel framework (e.g.,
diff --git a/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst b/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst
index 44006c52d4..762d0d0e0e 100644
--- a/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst
+++ b/docs/libcudacxx/extended_api/memory_access_properties/access_property.rst
@@ -258,7 +258,7 @@ Mapping of access properties to NVVM-IR and the PTX ISA
 
 When ``cuda::access_property`` is applied to memory operation, it
 sometimes matches with some of the cache eviction priorities and cache
-hints introduced in the `PTX ISA Version 7.4 <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-isa-version-7-4>`_.
+hints introduced in the `PTX ISA Version 7.4 <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#changes-in-ptx-isa-version-7-4>`_.
 See `Cache Eviction Priority Hints <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-eviction-priority-hints>`_
 
 -  ``global``: ``evict_unchanged``
diff --git a/docs/libcudacxx/extended_api/memory_model.rst b/docs/libcudacxx/extended_api/memory_model.rst
index 91048ca680..ff9f9ef44c 100644
--- a/docs/libcudacxx/extended_api/memory_model.rst
+++ b/docs/libcudacxx/extended_api/memory_model.rst
@@ -78,7 +78,7 @@ An atomic operation is atomic at the scope it specifies if:
 .. note::
    If `hostNativeAtomicSupported` is `0`, atomic load or store operations at system scope that affect a
    naturally-aligned 16-byte wide object in
-   `unified memory <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#unified-memory>`__ or
+   `unified memory <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-unified-memory-programming-hd>`__ or
    `mapped memory <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#mapped-memory>`__ require system
    support. NVIDIA is not aware of any system that lacks this support and there is no CUDA API query available to
    detect such systems.
diff --git a/docs/libcudacxx/extended_api/synchronization_primitives.rst b/docs/libcudacxx/extended_api/synchronization_primitives.rst
index 9a9a08c45a..0d0101b08e 100644
--- a/docs/libcudacxx/extended_api/synchronization_primitives.rst
+++ b/docs/libcudacxx/extended_api/synchronization_primitives.rst
@@ -61,7 +61,7 @@ Synchronization Primitives
        primitive for constraining concurrent access
      - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.0
    * - :ref:`cuda::binary_semaphore <libcudacxx-extended-api-synchronization-counting-semaphore>`
-     - System wide `std::binary_semaphore <https://en.cppreference.com/w/cpp/thread/binary_semaphore>`_
+     - System wide `std::binary_semaphore <https://en.cppreference.com/w/cpp/thread/counting_semaphore>`_
        primitive for mutual exclusion
      - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.0
 
diff --git a/docs/libcudacxx/ptx.rst b/docs/libcudacxx/ptx.rst
index c5fea593d5..c4483db2de 100644
--- a/docs/libcudacxx/ptx.rst
+++ b/docs/libcudacxx/ptx.rst
@@ -480,9 +480,9 @@ Instructions by section
      - No
    * - `wmma.store <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-wmma-store>`__
      - No
-   * - `wmma.mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-multiply-accumulate-instructions-wmma-mma>`__
+   * - `wmma.mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-wmma-mma>`__
      - No
-   * - `mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-multiply-accumulate-instructions-mma>`__
+   * - `mma <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma>`__
      - No
    * - `ldmatrix <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-load-instruction-ldmatrix>`__
      - No
@@ -490,7 +490,7 @@ Instructions by section
      - No
    * - `movmatrix <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-transpose-instruction-movmatrix>`__
      - No
-   * - `mma.sp <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#multiply-and-accumulate-instruction-mma-sp>`__
+   * - `mma.sp <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-sparse-mma>`__
      - No
 
 .. list-table:: `Asynchronous Warpgroup Level Matrix Multiply-Accumulate Instructions <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-multiply-accumulate-instructions>`__
diff --git a/docs/libcudacxx/releases.rst b/docs/libcudacxx/releases.rst
index 182df6b351..cf91820fad 100644
--- a/docs/libcudacxx/releases.rst
+++ b/docs/libcudacxx/releases.rst
@@ -1,7 +1,7 @@
 .. _libcudacxx-releases:
 
 Releases
-============
+========
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/libcudacxx/releases/versioning.rst b/docs/libcudacxx/releases/versioning.rst
index 703ffaa855..514aed184f 100644
--- a/docs/libcudacxx/releases/versioning.rst
+++ b/docs/libcudacxx/releases/versioning.rst
@@ -149,8 +149,9 @@ that the default ABI version may change in any release. A subset of
 older ABI versions can be used instead by defining
 ``_LIBCUDACXX_CUDA_ABI_VERSION`` to the desired version.
 
-For more information on specific ABI versions, please see the `releases
-section <../releases.md>`_ and `changelog <changelog.md>`_.
+For more information on specific ABI versions, please see the
+:ref:`release section <libcudacxx-releases>` and
+:ref:`changelog <libcudacxx-releases-changelog>`.
 
 A program is ill-formed, no diagnostic required, if it uses two
 different translation units compiled with a different NVIDIA C++
diff --git a/docs/libcudacxx/standard_api/time_library.rst b/docs/libcudacxx/standard_api/time_library.rst
index 176e2a9ef6..e28685730b 100644
--- a/docs/libcudacxx/standard_api/time_library.rst
+++ b/docs/libcudacxx/standard_api/time_library.rst
@@ -34,7 +34,7 @@ we use:
 -  `GetSystemTimePreciseAsFileTime <https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getsystemtimepreciseasfiletime>`_ and
    `GetSystemTimeAsFileTime <https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getsystemtimeasfiletime>`_
    for host code on Windows.
--  `clock_gettime(CLOCK_REALTIME, ...) <https://linux.die.net/man/3/clock_gettime>`_ and `gettimeofday <https://linux.die.net/man/2/gettimeofday>`_
+-  `clock_gettime(CLOCK_REALTIME, ...) <https://man7.org/linux/man-pages/man3/clock_gettime.3.html>`_ and `gettimeofday <https://man7.org/linux/man-pages/man2/gettimeofday.2.html>`_
    for host code on Linux, Android, and QNX.
 -  `PTX's %globaltimer <https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer>`_ for device code.
 
diff --git a/docs/thrust/cmake_options.rst b/docs/thrust/cmake_options.rst
index 47fb14e750..a3dab2487b 100644
--- a/docs/thrust/cmake_options.rst
+++ b/docs/thrust/cmake_options.rst
@@ -1,3 +1,5 @@
+.. _cmake-options:
+
 CMake Options
 =============
 
@@ -83,6 +85,8 @@ Single Config CMake Options
    -  Selects the C++ standard dialect to use. Default is ``14``
       (C++14).
 
+.. _cmake-multi-config-options:
+
 Multi Config CMake Options
 --------------------------
 
diff --git a/docs/thrust/releases/changelog.rst b/docs/thrust/releases/changelog.rst
index 477e3c74d7..9ace537b48 100644
--- a/docs/thrust/releases/changelog.rst
+++ b/docs/thrust/releases/changelog.rst
@@ -223,7 +223,7 @@ Thrust 1.17.0 is the final minor release of the 1.X series. This release
 provides GDB pretty-printers for device vectors/references, a new
 ``unique_count`` algorithm, and an easier way to create tagged Thrust
 iterators. Several documentation fixes are included, which can be found
-on the new Thrust documentation site at https://nvidia.github.io/thrust.
+on the new Thrust documentation site at https://nvidia.github.io/cccl/thrust/.
 We’ll be migrating existing documentation sources to this new location
 over the next few months.
 
@@ -255,8 +255,7 @@ Other Enhancements
 
 -  NVIDIA/thrust#1512: Use CUB to implement ``adjacent_difference``.
 -  NVIDIA/thrust#1555: Use CUB to implement ``scan_by_key``.
--  NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation at
-   https://nvidia.github.io/thrust.
+-  NVIDIA/thrust#1611: Add new doxybook-based Thrust documentation
 -  NVIDIA/thrust#1639: Fixed broken link in documentation. Thanks to
    @jrhemstad for this contribution.
 -  NVIDIA/thrust#1644: Increase contrast of search input text in new doc
@@ -792,7 +791,7 @@ New Features
 -  NVIDIA/thrust#1159: CMake multi-config support, which allows multiple
    combinations of host and device systems to be built and tested at
    once. More details can be found here:
-   https://github.com/NVIDIA/thrust/blob/main/CONTRIBUTING.md#multi-config-cmake-options
+   :ref:`Multi Config CMake Options <cmake-multi-config-options>`
 -  CMake refactoring:
 
    -  Added install targets to CMake builds.
@@ -800,7 +799,7 @@ New Features
    -  Thrust can be added to another CMake project by calling
       ``add_subdirectory`` with the Thrust source root (see
       NVIDIA/thrust#976). An example can be found here:
-      https://github.com/NVIDIA/thrust/blob/main/examples/cmake/add_subdir/CMakeLists.txt
+      https://github.com/NVIDIA/cccl/blob/main/thrust/examples/cmake/add_subdir/CMakeLists.txt
    -  CMake < 3.15 is no longer supported.
    -  Dialects are now configured through target properties. A new
       ``THRUST_CPP_DIALECT`` option has been added for single config
@@ -831,7 +830,7 @@ Other Enhancements
 ~~~~~~~~~~~~~~~~~~
 
 -  Contributor documentation:
-   https://github.com/NVIDIA/thrust/blob/main/CONTRIBUTING.md
+   https://github.com/NVIDIA/cccl/blob/main/CONTRIBUTING.md
 -  Code of Conduct:
    https://github.com/NVIDIA/thrust/blob/main/CODE_OF_CONDUCT.md. Thanks
    to Conor Hoekstra for this contribution.
diff --git a/thrust/README.md b/thrust/README.md
index 7a0c478c99..37103fee04 100644
--- a/thrust/README.md
+++ b/thrust/README.md
@@ -123,7 +123,7 @@ git clone --recursive https://github.com/NVIDIA/thrust.git
 
 ## Using Thrust From Your Project
 
-For CMake-based projects, we provide a CMake package for use with `find_package`. See the [CMake README](https://github.com/NVIDIA/cccl/blob/main/docs/thrust/github_pages/setup/cmake_options.md) for more information.
+For CMake-based projects, we provide a CMake package for use with `find_package`. See :ref:`CMake Options <cmake-options>` for more information.
 Thrust can also be added via `add_subdirectory` or tools like the [CMake Package Manager](https://github.com/cpm-cmake/CPM.cmake).
 
 For non-CMake projects, compile with:
@@ -188,7 +188,7 @@ Some parts are distributed under the [Apache License v2.0] and the [Boost Licens
 
 [GitHub]: https://github.com/NVIDIA/cccl/tree/main/thrust
 
-[contributing section]: https://nvidia.github.io/thrust/contributing.html
+[contributing section]: https://nvidia.github.io/cccl/thrust/contributing.html
 
 [CMake build system]: https://cmake.org
 
diff --git a/thrust/thrust/functional.h b/thrust/thrust/functional.h
index 87ba0f78cd..4b88f46954 100644
--- a/thrust/thrust/functional.h
+++ b/thrust/thrust/functional.h
@@ -1249,8 +1249,7 @@ _CCCL_SUPPRESS_DEPRECATED_PUSH
  *  \param pred The Adaptable Binary Predicate to negate.
  *  \return A new object, <tt>npred</tt> such that <tt>npred(x,y)</tt> always returns
  *          the same value as <tt>!pred(x,y)</tt>.
- *  \tparam Binary Predicate is a model of <a
- * href="https://en.cppreference.com/w/cpp/utility/functional/AdaptableBinaryPredicate">Adaptable Binary Predicate</a>.
+ *  \tparam Binary Predicate is a model of an Adaptable Binary Predicate.
  *  \see binary_negate
  *  \see not1
  */
diff --git a/thrust/thrust/replace.h b/thrust/thrust/replace.h
index 5a455c385a..5d0b58e6d0 100644
--- a/thrust/thrust/replace.h
+++ b/thrust/thrust/replace.h
@@ -54,7 +54,7 @@ THRUST_NAMESPACE_BEGIN
  *  \tparam DerivedPolicy The name of the derived execution policy.
  *  \tparam ForwardIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/forward_iterator">Forward
  * Iterator</a>, and \p ForwardIterator is mutable. \tparam T is a model of <a
- * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable>Assignable">Assignable</a>, \p T is a model of <a
+ * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, \p T is a model of <a
  * href="https://en.cppreference.com/w/cpp/concepts/equality_comparable">EqualityComparable</a>, objects of \p T may be
  * compared for equality with objects of \p ForwardIterator's \c value_type, and \p T is convertible to \p
  * ForwardIterator's \c value_type.
@@ -105,7 +105,7 @@ replace(const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
  *
  *  \tparam ForwardIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/forward_iterator">Forward
  * Iterator</a>, and \p ForwardIterator is mutable. \tparam T is a model of <a
- * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable>Assignable">Assignable</a>, \p T is a model of <a
+ * href="https://en.cppreference.com/w/cpp/named_req/CopyAssignable">Assignable</a>, \p T is a model of <a
  * href="https://en.cppreference.com/w/cpp/concepts/equality_comparable">EqualityComparable</a>, objects of \p T may be
  * compared for equality with objects of \p ForwardIterator's \c value_type, and \p T is convertible to \p
  * ForwardIterator's \c value_type.