diff --git a/.github/workflows/build-and-test-linux.yml b/.github/workflows/build-and-test-linux.yml
new file mode 100644
index 0000000000..b328e97884
--- /dev/null
+++ b/.github/workflows/build-and-test-linux.yml
@@ -0,0 +1,39 @@
+name: build and test
+
+defaults:
+  run:
+    shell: bash -exo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      cpu: {type: string, required: true}
+      test_name: {type: string, required: false}
+      build_script: {type: string, required: false}
+      test_script: {type: string, required: false}
+      container_image: {type: string, required: false}
+      run_tests: {type: boolean, required: false, default: true}
+
+jobs:
+  build:
+    name: Build ${{inputs.test_name}}
+    uses: ./.github/workflows/run-as-coder.yml
+    with:
+      name: Build ${{inputs.test_name}}
+      runner: linux-${{inputs.cpu}}-cpu16
+      image:  ${{ inputs.container_image }}
+      command: |
+        ${{ inputs.build_script }}
+
+  test:
+    needs: build
+    if:  ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
+    name: Test ${{inputs.test_name}}
+    uses: ./.github/workflows/run-as-coder.yml
+    with:
+      name: Test ${{inputs.test_name}}
+      runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
+      image: ${{inputs.container_image}}
+      command: |
+        nvidia-smi
+        ${{ inputs.test_script }}
diff --git a/.github/workflows/build-and-test-windows.yml b/.github/workflows/build-and-test-windows.yml
new file mode 100644
index 0000000000..83c143899c
--- /dev/null
+++ b/.github/workflows/build-and-test-windows.yml
@@ -0,0 +1,49 @@
+name: Build Windows
+
+on:
+  workflow_call:
+    inputs:
+      test_name: {type: string, required: false}
+      build_script: {type: string, required: false}
+      container_image: {type: string, required: false}
+
+jobs:
+  prepare:
+    name: Build ${{inputs.test_name}}
+    runs-on: windows-2022
+    permissions:
+      id-token: write
+      contents: read
+    env:
+      SCCACHE_BUCKET: rapids-sccache-devs
+      SCCACHE_REGION: us-east-2
+      SCCACHE_IDLE_TIMEOUT: 0
+      SCCACHE_S3_USE_SSL: true
+      SCCACHE_S3_NO_CREDENTIALS: false
+    steps:
+      - name: Get AWS credentials for sccache bucket
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+          aws-region: us-east-2
+          role-duration-seconds: 43200 # 12 hours
+      - name: Fetch ${{ inputs.container_image }}
+        shell: powershell
+        run: docker pull ${{ inputs.container_image }}
+      - name: Run the tests
+        shell: powershell
+        run: >-
+            docker run ${{ inputs.container_image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
+                                                                    git clone https://github.com/NVIDIA/cccl.git;
+                                                                    cd cccl;
+                                                                    git fetch --all;
+                                                                    git checkout ${{github.ref_name}};
+                                                                    ${{inputs.build_script}};"
+
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
deleted file mode 100644
index 38fe7c899e..0000000000
--- a/.github/workflows/build-and-test.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: build and test
-
-defaults:
-  run:
-    shell: bash -exo pipefail {0}
-
-on:
-  workflow_call:
-    inputs:
-      cuda_version: {type: string, required: true}
-      compiler: {type: string, required: true}
-      compiler_exe: {type: string, required: true}
-      compiler_version: {type: string, required: true}
-      std: {type: string, required: true}
-      gpu_build_archs: {type: string, required: true}
-      cpu: {type: string, required: true}
-      os: {type: string, required: true}
-      build_script: {type: string, required: false}
-      test_script: {type: string, required: false}
-      run_tests: {type: boolean, required: false, default: true}
-      build_image: {type: string, required: false}
-      test_image: {type: string, required: false}
-
-jobs:
-  build:
-    if: inputs.build_script != '' && inputs.build_image != ''
-    name: Build ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: Build ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}
-      runner: linux-${{inputs.cpu}}-cpu16
-      image: ${{inputs.build_image}}
-      command: |
-        ${{ inputs.build_script }} "${{inputs.compiler_exe}}" "${{inputs.std}}" "${{inputs.gpu_build_archs}}"
-
-  test:
-    needs: build
-    if:  ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.test_script != '' && inputs.test_image != '' && inputs.run_tests}}
-    name: Test ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: Test ${{inputs.compiler}}${{inputs.compiler_version}}/C++${{inputs.std}}
-      runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
-      image: ${{inputs.test_image}}
-      command: |
-        nvidia-smi
-        ${{ inputs.test_script }} "${{inputs.compiler_exe}}" "${{inputs.std}}" "${{inputs.gpu_build_archs}}"
diff --git a/.github/workflows/build-examples.yml b/.github/workflows/build-examples.yml
new file mode 100644
index 0000000000..d23ff6c273
--- /dev/null
+++ b/.github/workflows/build-examples.yml
@@ -0,0 +1,27 @@
+name: Build examples
+on:
+  workflow_call:
+    inputs:
+      per_cuda_compiler_matrix: {type: string, required: true}
+      devcontainer_version: {type: string, required: true}
+      is_windows: {type: boolean, required: true}
+
+jobs:
+  # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
+  # ensures that the build/test steps can overlap across different configurations. For example,
+  # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
+  build_examples:
+    name: Build examples
+    if: ${{ !inputs.is_windows }}
+    uses: ./.github/workflows/run-as-coder.yml
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+    with:
+      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
+      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
+      image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+      command: |
+        cmake -S . --preset=examples -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} -DCMAKE_CUDA_COMPILER=nvcc
+        ctest --preset=examples
\ No newline at end of file
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
index 52bb3dcd59..db93fa5d70 100644
--- a/.github/workflows/dispatch-build-and-test.yml
+++ b/.github/workflows/dispatch-build-and-test.yml
@@ -3,34 +3,40 @@ name: Dispatch build and test
 on:
   workflow_call:
     inputs:
+      project_name: {type: string, required: true}
       per_cuda_compiler_matrix: {type: string, required: true}
-      build_script: {type: string, required: false}
-      test_script: {type: string, required: false}
       devcontainer_version: {type: string, required: true}
+      is_windows: {type: boolean, required: true}
 
 jobs:
   # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
   # ensures that the build/test steps can overlap across different configurations. For example,
   # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
-  build_and_test:
-    name: ${{matrix.cpu}}
-    uses: ./.github/workflows/build-and-test.yml
+  build_and_test_linux:
+    name: build and test linux
+    if: ${{ !inputs.is_windows }}
+    uses: ./.github/workflows/build-and-test-linux.yml
     strategy:
       fail-fast: false
       matrix:
         include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
     with:
-      cuda_version: ${{ matrix.cuda }}
-      compiler: ${{ matrix.compiler.name }}
-      compiler_exe: ${{ matrix.compiler.exe }}
-      compiler_version: ${{ matrix.compiler.version }}
-      std: ${{ matrix.std }}
-      gpu_build_archs: ${{ matrix.gpu_build_archs }}
       cpu: ${{ matrix.cpu }}
-      os: ${{ matrix.os }}
-      build_script: ${{ inputs.build_script }}
-      build_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      test_script: ${{ inputs.test_script }}
-      run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }}
-      test_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+      test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
+      build_script: "./ci/build_${{ inputs.project_name }}.sh ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      test_script:  "./ci/test_${{ inputs.project_name }}.sh  ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+      run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }}
 
+  build_and_test_windows:
+    name: build and test windows
+    if: ${{ inputs.is_windows }}
+    uses: ./.github/workflows/build-and-test-windows.yml
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+    with:
+      test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
+      build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}}
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index d7fe05d1a1..fc9899885f 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -33,7 +33,7 @@ concurrency:
 
 jobs:
   compute-matrix:
-    name: Compute matrix 
+    name: Compute matrix
     runs-on: ubuntu-latest
     outputs:
       DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
@@ -49,7 +49,7 @@ jobs:
         id: set-outputs
         run: |
           .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
-      
+
   nvrtc:
     name: NVRTC CUDA${{matrix.cuda}} C++${{matrix.std}}
     needs: compute-matrix
@@ -76,10 +76,10 @@ jobs:
         cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
         compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
+      project_name: "thrust"
       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
-      build_script: "./ci/build_thrust.sh"
-      test_script: "./ci/test_thrust.sh"
       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   cub:
     name: CUB CUDA${{ matrix.cuda_version }} ${{ matrix.compiler }}
@@ -91,10 +91,10 @@ jobs:
         cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
         compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
+      project_name: "cub"
       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
-      build_script: "./ci/build_cub.sh"
-      test_script: "./ci/test_cub.sh"
       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   libcudacxx:
     name: libcudacxx CUDA${{ matrix.cuda_version }} ${{ matrix.compiler }}
@@ -106,27 +106,24 @@ jobs:
         cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
         compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
+      project_name: "libcudacxx"
       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
-      build_script: "./ci/build_libcudacxx.sh"
-      test_script: "./ci/test_libcudacxx.sh"
       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   examples:
     name: CCCL Examples
     needs: compute-matrix
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }}
+    uses: ./.github/workflows/build-examples.yml
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.NVCC_FULL_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
+        cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
+        compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
-      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        cmake -S . --preset=examples -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} -DCMAKE_CUDA_COMPILER=nvcc
-        ctest --preset=examples
+      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
+      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   # This job is the final job that runs after all other jobs and is used for branch protection status checks.
   # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
diff --git a/.gitignore b/.gitignore
index a221dae54e..57a16cb539 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 *build*/
 .cache
 .aws
-.config
\ No newline at end of file
+.config
+_deps/catch2-src/
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 615168de50..cf0ba28180 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,11 @@
 # 3.21 is the minimum for the developer build.
 cmake_minimum_required(VERSION 3.15)
 
+# sccache cannot handle the -Fd option generationg pdb files
+if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
+  cmake_policy(SET CMP0141 NEW)
+endif()
+
 # Determine whether CCCL is the top-level project or included into
 # another project via add_subdirectory()
 if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_LIST_DIR}")
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index 707c06c695..1e16fb3312 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -9,7 +9,7 @@ gpus:
   - 'v100'
 
 # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
-devcontainer_version: '23.08'
+devcontainer_version: '23.10'
 
 # Each environment below will generate a unique build/test job
 # See the "compute-matrix" job in the workflow for how this is parsed and used
@@ -27,24 +27,27 @@ devcontainer_version: '23.08'
 # Configurations that will run for every PR
 pull_request:
   nvcc:
-    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc', version: '6', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14], jobs: ['build']}
-    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc', version: '7', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc', version: '8', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc', version: '9', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc', version: '7', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc', version: '8', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc', version: '9', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc', version: '10', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '12', exe: 'g++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'llvm', version: '9', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '9', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
-    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '10', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17], jobs: ['build']}
+    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '6',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14],         jobs: ['build']}
+    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '7',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '8',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '9',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '7',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '8',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '9',  exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '10', exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '11', exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc',  version: '12', exe: 'g++'},     gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build', 'test']}
+    - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'llvm', version: '9',  exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '9',  exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '10', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17],     jobs: ['build']}
     - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '11', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
     - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '12', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
     - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '13', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
     - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '14', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
     - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '15', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build']}
     - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '16', exe: 'clang++'}, gpu_build_archs: '70', std: [11, 14, 17, 20], jobs: ['build', 'test']}
+    - {cuda: *cuda_oldest, os: 'windows2022', cpu: 'amd64', compiler: {name: 'cl',   version: '14.16', exe: 'cl++'}, gpu_build_archs: '70', std: [14, 17],         jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'windows2022', cpu: 'amd64', compiler: {name: 'cl',   version: '14.29', exe: 'cl++'}, gpu_build_archs: '70', std: [14, 17],         jobs: ['build']}
+    - {cuda: *cuda_newest, os: 'windows2022', cpu: 'amd64', compiler: {name: 'cl',   version: '14.36', exe: 'cl++'}, gpu_build_archs: '70', std: [14, 17, 20],     jobs: ['build']}
   nvrtc:
     - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', gpu_build_archs: '70', std: [11, 14, 17, 20]}
diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1
new file mode 100644
index 0000000000..0cb85e7fc2
--- /dev/null
+++ b/ci/windows/build_common.psm1
@@ -0,0 +1,151 @@
+
+Param(
+    [Parameter(Mandatory = $true)]
+    [Alias("cxx")]
+    [ValidateNotNullOrEmpty()]
+    [ValidateSet(11, 14, 17, 20)]
+    [int]$CXX_STANDARD = 17,
+    [Parameter(Mandatory = $true)]
+    [Alias("archs")]
+    [ValidateNotNullOrEmpty()]
+    [string]$GPU_ARCHS = "70"
+)
+
+
+# We need the full path to cl because otherwise cmake will replace CMAKE_CXX_COMPILER with the full path
+# and keep CMAKE_CUDA_HOST_COMPILER at "cl" which breaks our cmake script
+$script:HOST_COMPILER  = (Get-Command "cl").source -replace '\\','/'
+$script:PARALLEL_LEVEL = (Get-WmiObject -class Win32_processor).NumberOfLogicalProcessors
+
+If($null -eq $env:DEVCONTAINER_NAME) {
+    $script:BUILD_DIR="$PSScriptRoot/../../build/local"
+} else {
+    $script:BUILD_DIR="$PSScriptRoot/../../build/$DEVCONTAINER_NAME"
+}
+
+If(!(test-path -PathType container "../build")) {
+    New-Item -ItemType Directory -Path "../build"
+}
+
+# The most recent build will always be symlinked to cccl/build/latest
+New-Item -ItemType Directory -Path "$BUILD_DIR" -Force
+
+# replace sccache binary to get it working with MSVC
+$script:path_to_sccache =(gcm sccache).Source
+Remove-Item $path_to_sccache -Force
+Invoke-WebRequest -Uri "https://github.com/robertmaynard/sccache/releases/download/nvcc_msvc_v1/sccache.exe" -OutFile $path_to_sccache
+
+$script:COMMON_CMAKE_OPTIONS= @(
+    "-S .."
+    "-B $BUILD_DIR"
+    "-G Ninja"
+    "-DCMAKE_BUILD_TYPE=Release"
+    "-DCMAKE_CXX_STANDARD=$CXX_STANDARD"
+    "-DCMAKE_CUDA_STANDARD=$CXX_STANDARD"
+    "-DCMAKE_CXX_COMPILER=$HOST_COMPILER"
+    "-DCMAKE_CUDA_HOST_COMPILER=$HOST_COMPILER"
+    "-DCMAKE_CUDA_ARCHITECTURES=$GPU_ARCHS"
+    "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON"
+)
+
+Write-Host "========================================"
+Write-Host "Begin build"
+Write-Host "pwd=$pwd"
+Write-Host "HOST_COMPILER=$HOST_COMPILER"
+Write-Host "CXX_STANDARD=$CXX_STANDARD"
+Write-Host "GPU_ARCHS=$GPU_ARCHS"
+Write-Host "PARALLEL_LEVEL=$PARALLEL_LEVEL"
+Write-Host "BUILD_DIR=$BUILD_DIR"
+Write-Host "Current commit is:"
+Write-Host "$(git log -1)"
+Write-Host "========================================"
+
+function configure {
+    Param(
+        [Parameter(Mandatory = $true)]
+        [ValidateNotNullOrEmpty()]
+        $CMAKE_OPTIONS
+    )
+
+    $FULL_CMAKE_OPTIONS = $script:COMMON_CMAKE_OPTIONS + $CMAKE_OPTIONS
+    cmake $FULL_CMAKE_OPTIONS
+    $test_result = $LastExitCode
+
+    If ($test_result -ne 0) {
+        throw 'Step Failed'
+    }
+}
+
+function build {
+    Param(
+        [Parameter(Mandatory = $true)]
+        [ValidateNotNullOrEmpty()]
+        [string]$BUILD_NAME
+    )
+
+    sccache_stats('Start')
+
+    cmake --build $script:BUILD_DIR --parallel $script:PARALLEL_LEVEL
+    $test_result = $LastExitCode
+
+    sccache_stats('Stop')
+    echo "${BUILD_NAME} build complete"
+    If ($test_result -ne 0) {
+         throw 'Step Failed'
+    }
+}
+
+function configure_and_build {
+    Param(
+        [Parameter(Mandatory = $true)]
+        [ValidateNotNullOrEmpty()]
+        [string]$BUILD_NAME,
+        [Parameter(Mandatory = $true)]
+        [ValidateNotNullOrEmpty()]
+        $CMAKE_OPTIONS
+    )
+
+    configure -CMAKE_OPTIONS $CMAKE_OPTIONS
+    build -BUILD_NAME $BUILD_NAME
+}
+
+function sccache_stats {
+    Param (
+        [Parameter(Mandatory = $true)]
+        [ValidateNotNullOrEmpty()]
+        [ValidateSet('Start','Stop')]
+        [string]$MODE
+    )
+
+    $sccache_stats = sccache -s
+    If($MODE -eq 'Start') {
+        [int]$script:sccache_compile_requests = ($sccache_stats[0] -replace '[^\d]+')
+        [int]$script:sccache_cache_hits_cpp   = ($sccache_stats[2] -replace '[^\d]+')
+        [int]$script:sccache_cache_hits_cuda  = ($sccache_stats[3] -replace '[^\d]+')
+        [int]$script:sccache_cache_miss_cpp   = ($sccache_stats[5] -replace '[^\d]+')
+        [int]$script:sccache_cache_miss_cuda  = ($sccache_stats[6] -replace '[^\d]+')
+    } else {
+        [int]$final_sccache_compile_requests = ($sccache_stats[0] -replace '[^\d]+')
+        [int]$final_sccache_cache_hits_cpp   = ($sccache_stats[2] -replace '[^\d]+')
+        [int]$final_sccache_cache_hits_cuda  = ($sccache_stats[3] -replace '[^\d]+')
+        [int]$final_sccache_cache_miss_cpp   = ($sccache_stats[5] -replace '[^\d]+')
+        [int]$final_sccache_cache_miss_cuda  = ($sccache_stats[6] -replace '[^\d]+')
+
+        [int]$total_requests  = $final_sccache_compile_requests - $script:sccache_compile_requests
+        [int]$total_hits_cpp  = $final_sccache_cache_hits_cpp   - $script:sccache_cache_hits_cpp
+        [int]$total_hits_cuda = $final_sccache_cache_hits_cuda  - $script:sccache_cache_hits_cuda
+        [int]$total_miss_cpp  = $final_sccache_cache_miss_cpp   - $script:sccache_cache_miss_cpp
+        [int]$total_miss_cuda = $final_sccache_cache_miss_cuda  - $script:sccache_cache_miss_cuda
+        If ( $total_requests -gt 0 ) {
+            [int]$hit_rate_cpp  = $total_hits_cpp  / $total_requests * 100;
+            [int]$hit_rate_cuda = $total_hits_cuda / $total_requests * 100;
+            echo "sccache hits cpp:  $total_hits_cpp  `t| misses: $total_miss_cpp  `t| hit rate: $hit_rate_cpp%"
+            echo "sccache hits cuda: $total_hits_cuda `t| misses: $total_miss_cuda `t| hit rate: $hit_rate_cuda%"
+        } else {
+            echo "sccache stats: N/A No new compilation requests"
+        }
+    }
+}
+
+Export-ModuleMember -Function configure, build, configure_and_build, sccache_stats
+Export-ModuleMember -Variable BUILD_DIR
diff --git a/ci/windows/build_cub.ps1 b/ci/windows/build_cub.ps1
new file mode 100644
index 0000000000..6642638156
--- /dev/null
+++ b/ci/windows/build_cub.ps1
@@ -0,0 +1,46 @@
+
+Param(
+    [Parameter(Mandatory = $true)]
+    [Alias("cxx")]
+    [ValidateNotNullOrEmpty()]
+    [ValidateSet(11, 14, 17, 20)]
+    [int]$CXX_STANDARD = 17,
+    [Parameter(Mandatory = $true)]
+    [Alias("archs")]
+    [ValidateNotNullOrEmpty()]
+    [string]$GPU_ARCHS = "70"
+)
+
+$CURRENT_PATH = Split-Path $pwd -leaf
+If($CURRENT_PATH -ne "ci") {
+    Write-Host "Moving to ci folder"
+    pushd "$PSScriptRoot/.."
+}
+
+Remove-Module -Name build_common
+Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
+
+$ENABLE_DIALECT_CPP11 = If ($CXX_STANDARD -ne 11) {"OFF"} Else {"ON"}
+$ENABLE_DIALECT_CPP14 = If ($CXX_STANDARD -ne 14) {"OFF"} Else {"ON"}
+$ENABLE_DIALECT_CPP17 = If ($CXX_STANDARD -ne 17) {"OFF"} Else {"ON"}
+$ENABLE_DIALECT_CPP20 = If ($CXX_STANDARD -ne 20) {"OFF"} Else {"ON"}
+
+$CMAKE_OPTIONS = @(
+    "-DCCCL_ENABLE_THRUST=OFF"
+    "-DCCCL_ENABLE_LIBCUDACXX=OFF"
+    "-DCCCL_ENABLE_CUB=ON"
+    "-DCCCL_ENABLE_TESTING=OFF"
+    "-DCUB_ENABLE_DIALECT_CPP11=$ENABLE_DIALECT_CPP11"
+    "-DCUB_ENABLE_DIALECT_CPP14=$ENABLE_DIALECT_CPP14"
+    "-DCUB_ENABLE_DIALECT_CPP17=$ENABLE_DIALECT_CPP17"
+    "-DCUB_ENABLE_DIALECT_CPP20=$ENABLE_DIALECT_CPP20"
+    "-DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT=ON"
+    "-DCUB_IGNORE_DEPRECATED_CPP_DIALECT=ON"
+    "-DCUB_ENABLE_RDC_TESTS=OFF"
+)
+
+configure_and_build "CUB" $CMAKE_OPTIONS
+
+If($CURRENT_PATH -ne "ci") {
+    popd
+}
diff --git a/ci/windows/build_libcudacxx.ps1 b/ci/windows/build_libcudacxx.ps1
new file mode 100644
index 0000000000..9219a5c04e
--- /dev/null
+++ b/ci/windows/build_libcudacxx.ps1
@@ -0,0 +1,56 @@
+
+Param(
+    [Parameter(Mandatory = $true)]
+    [Alias("cxx")]
+    [ValidateNotNullOrEmpty()]
+    [ValidateSet(11, 14, 17, 20)]
+    [int]$CXX_STANDARD = 17,
+    [Parameter(Mandatory = $true)]
+    [Alias("archs")]
+    [ValidateNotNullOrEmpty()]
+    [string]$GPU_ARCHS = "70"
+)
+
+$CURRENT_PATH = Split-Path $pwd -leaf
+If($CURRENT_PATH -ne "ci") {
+    Write-Host "Moving to ci folder"
+    pushd "$PSScriptRoot/.."
+}
+
+Remove-Module -Name build_common
+Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
+
+$CMAKE_OPTIONS = @(
+    "-DCCCL_ENABLE_THRUST=OFF"
+    "-DCCCL_ENABLE_LIBCUDACXX=ON"
+    "-DCCCL_ENABLE_CUB=OFF"
+    "-DCCCL_ENABLE_TESTING=OFF"
+    "-DLIBCUDACXX_ENABLE_LIBCUDACXX_TESTS=ON"
+)
+
+$LIT_OPTIONS = @(
+    "-v"
+    "--no-progress-bar"
+    "-Dexecutor=""NoopExecutor()"""
+    "-Dcompute_archs=$GPU_ARCHS"
+    "-Dstd=c++$CXX_STANDARD"
+    "$BUILD_DIR/libcudacxx/test"
+)
+
+configure $CMAKE_OPTIONS
+
+pushd $BUILD_DIR/libcudacxx/
+
+sccache_stats('Start')
+lit $LIT_OPTIONS
+$test_result = $LastExitCode
+sccache_stats('Stop')
+
+popd
+If($CURRENT_PATH -ne "ci") {
+    popd
+}
+
+If ($test_result -ne 0) {
+    throw 'Step Failed'
+}
\ No newline at end of file
diff --git a/ci/windows/build_thrust.ps1 b/ci/windows/build_thrust.ps1
new file mode 100644
index 0000000000..264849a0cd
--- /dev/null
+++ b/ci/windows/build_thrust.ps1
@@ -0,0 +1,46 @@
+
+Param(
+    [Parameter(Mandatory = $true)]
+    [Alias("cxx")]
+    [ValidateNotNullOrEmpty()]
+    [ValidateSet(11, 14, 17, 20)]
+    [int]$CXX_STANDARD = 17,
+    [Parameter(Mandatory = $true)]
+    [Alias("archs")]
+    [ValidateNotNullOrEmpty()]
+    [string]$GPU_ARCHS = "70"
+)
+
+$CURRENT_PATH = Split-Path $pwd -leaf
+If($CURRENT_PATH -ne "ci") {
+    Write-Host "Moving to ci folder"
+    pushd "$PSScriptRoot/.."
+}
+
+Remove-Module -Name build_common
+Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
+
+$ENABLE_DIALECT_CPP11 = If ($CXX_STANDARD -ne 11) {"OFF"} Else {"ON"}
+$ENABLE_DIALECT_CPP14 = If ($CXX_STANDARD -ne 14) {"OFF"} Else {"ON"}
+$ENABLE_DIALECT_CPP17 = If ($CXX_STANDARD -ne 17) {"OFF"} Else {"ON"}
+$ENABLE_DIALECT_CPP20 = If ($CXX_STANDARD -ne 20) {"OFF"} Else {"ON"}
+
+$CMAKE_OPTIONS = @(
+    "-DCCCL_ENABLE_THRUST=ON"
+    "-DCCCL_ENABLE_LIBCUDACXX=OFF"
+    "-DCCCL_ENABLE_CUB=OFF"
+    "-DCCCL_ENABLE_TESTING=OFF"
+    "-DTHRUST_ENABLE_MULTICONFIG=ON"
+    "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_CPP11=$ENABLE_DIALECT_CPP11"
+    "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_CPP14=$ENABLE_DIALECT_CPP14"
+    "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_CPP17=$ENABLE_DIALECT_CPP17"
+    "-DTHRUST_MULTICONFIG_ENABLE_DIALECT_CPP20=$ENABLE_DIALECT_CPP20"
+    "-DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT=ON"
+    "-DCUB_IGNORE_DEPRECATED_CPP_DIALECT=ON"
+)
+
+configure_and_build "Thrust" $CMAKE_OPTIONS
+
+If($CURRENT_PATH -ne "ci") {
+    popd
+}
diff --git a/cub/cmake/CubBuildCompilerTargets.cmake b/cub/cmake/CubBuildCompilerTargets.cmake
index 6a77bc608a..2cef662dca 100644
--- a/cub/cmake/CubBuildCompilerTargets.cmake
+++ b/cub/cmake/CubBuildCompilerTargets.cmake
@@ -13,6 +13,10 @@ function(cub_build_compiler_targets)
 
   if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
     list(APPEND cxx_compile_definitions _ENABLE_EXTENDED_ALIGNED_STORAGE)
+    list(APPEND cuda_compile_options "--use-local-env")
+
+    # sccache cannot handle the -Fd option generationg pdb files
+    set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
 
     append_option_if_available("/W4" cxx_compile_options)
 
@@ -131,4 +135,12 @@ function(cub_build_compiler_targets)
     # Don't complain about deprecated GPU targets.
     $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Wno-deprecated-gpu-targets>
   )
+
+  if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
+    # Use the local env instead of rebuilding it all the time
+    target_compile_options(cub.compiler_interface INTERFACE
+      # If using CUDA w/ NVCC...
+      $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--use-local-env>
+    )
+  endif()
 endfunction()
diff --git a/cub/cub/agent/agent_three_way_partition.cuh b/cub/cub/agent/agent_three_way_partition.cuh
index 6d81fdb514..1ec55372ef 100644
--- a/cub/cub/agent/agent_three_way_partition.cuh
+++ b/cub/cub/agent/agent_three_way_partition.cuh
@@ -95,16 +95,17 @@ struct accumulator_pack_base_t<OffsetT, typename cuda::std::enable_if<sizeof(Off
 template <class OffsetT>
 struct accumulator_pack_t : accumulator_pack_base_t<OffsetT>
 {
-  using typename accumulator_pack_base_t<OffsetT>::pack_t;
+  using base = accumulator_pack_base_t<OffsetT>;
+  using typename base::pack_t;
 
   __device__ static void subtract(pack_t &packed, OffsetT val)
   {
-    packed = pack(first(packed) - val, second(packed) - val);
+    packed =  base::pack( base::first(packed) - val,  base::second(packed) - val);
   }
 
   __device__ static OffsetT sum(pack_t &packed)
   {
-    return first(packed) + second(packed);
+    return  base::first(packed) +  base::second(packed);
   }
 
   __device__ static pack_t zero()
@@ -131,7 +132,7 @@ struct AgentThreeWayPartitionPolicy
   constexpr static CacheLoadModifier LOAD_MODIFIER   = _LOAD_MODIFIER;
   constexpr static BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM;
 
-  struct detail 
+  struct detail
   {
     using delay_constructor_t = DelayConstructorT;
   };
diff --git a/cub/test/catch2_test_device_scan_iterators.cu b/cub/test/catch2_test_device_scan_iterators.cu
index 703a810c58..9ee2fc75bf 100644
--- a/cub/test/catch2_test_device_scan_iterators.cu
+++ b/cub/test/catch2_test_device_scan_iterators.cu
@@ -301,8 +301,8 @@ CUB_TEST("Device scan works complex accumulator types", "[scan][device]")
 
   custom_accumulator_t init{};
 
-  thrust::device_vector<custom_input_t> d_input(num_items, custom_input_t{1});
-  thrust::device_vector<custom_output_t> d_output{num_items, custom_output_t{nullptr, 0}};
+  thrust::device_vector<custom_input_t> d_input(static_cast<size_t>(num_items), custom_input_t{1});
+  thrust::device_vector<custom_output_t> d_output{static_cast<size_t>(num_items), custom_output_t{nullptr, 0}};
   thrust::device_vector<int> d_ok_count(1);
 
   auto index_it = thrust::make_counting_iterator(0);
diff --git a/libcudacxx/.upstream-tests/test/CMakeLists.txt b/libcudacxx/.upstream-tests/test/CMakeLists.txt
index b03563f09a..6024d60658 100644
--- a/libcudacxx/.upstream-tests/test/CMakeLists.txt
+++ b/libcudacxx/.upstream-tests/test/CMakeLists.txt
@@ -38,6 +38,11 @@ if (NOT MSVC)
      --compiler-options=-Wextra")
 endif()
 
+# sccache cannot handle the -Fd option generationg pdb files
+if (MSVC)
+  set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
+endif()
+
 # Intel OneAPI compiler has fast math enabled by default which breaks almost all floating point tests
 if (${CMAKE_CXX_COMPILER_ID} STREQUAL "IntelLLVM")
   set(LIBCUDACXX_TEST_COMPILER_FLAGS
diff --git a/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.cuda.pass.cpp b/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.cuda.pass.cpp
index 615e41d509..e0e00cf22c 100644
--- a/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.cuda.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.cuda.pass.cpp
@@ -74,12 +74,16 @@ struct clear_token
     }
 };
 
-using aw_aw_pw = performer_list<
+// older CTK together with MSVC 2017 cannot handle the full list
+using aw_aw_pw1 = performer_list<
     barrier_parity_wait<false>,
     barrier_arrive_and_wait,
     barrier_arrive_and_wait,
     async_tester_fence,
-    clear_token,
+    clear_token
+>;
+
+using aw_aw_pw2 = performer_list<
     barrier_parity_wait<true>,
     barrier_arrive_and_wait,
     barrier_arrive_and_wait,
@@ -91,7 +95,11 @@ void kernel_invoker()
 {
     validate_not_movable<
         barrier_and_token<cuda::barrier<cuda::thread_scope_system>>,
-        aw_aw_pw
+        aw_aw_pw1
+      >(2);
+    validate_not_movable<
+        barrier_and_token<cuda::barrier<cuda::thread_scope_system>>,
+        aw_aw_pw2
       >(2);
 }
 
diff --git a/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.std.pass.cpp b/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.std.pass.cpp
index d3206ad8fb..7446bd67d6 100644
--- a/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.std.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/heterogeneous/barrier_parity.std.pass.cpp
@@ -74,12 +74,15 @@ struct clear_token
     }
 };
 
-using aw_aw_pw = performer_list<
+using aw_aw_pw1 = performer_list<
     barrier_parity_wait<false>,
     barrier_arrive_and_wait,
     barrier_arrive_and_wait,
     async_tester_fence,
     clear_token,
+>;
+
+using aw_aw_pw2 = performer_list<
     barrier_parity_wait<true>,
     barrier_arrive_and_wait,
     barrier_arrive_and_wait,
@@ -91,7 +94,11 @@ void kernel_invoker()
 {
     validate_not_movable<
         barrier_and_token<cuda::std::barrier<>>,
-        aw_aw_pw
+        aw_aw_pw1
+      >(2);
+    validate_not_movable<
+        barrier_and_token<cuda::std::barrier<>>,
+        aw_aw_pw2
       >(2);
 }
 
diff --git a/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp b/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp
index 9a338b44d0..4cc5827dd0 100644
--- a/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.invocable/invocable.compile.pass.cpp
@@ -317,9 +317,9 @@ static_assert(invocable<lvalue_qualified, S&>, "");
 static_assert(!invocable<lvalue_qualified, S const&>, "");
 static_assert(!invocable<lvalue_qualified, S volatile&>, "");
 static_assert(!invocable<lvalue_qualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!invocable<lvalue_qualified, S&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!invocable<lvalue_qualified, S const&&>, "");
 static_assert(!invocable<lvalue_qualified, S volatile&&>, "");
 static_assert(!invocable<lvalue_qualified, S const volatile&&>, "");
@@ -345,12 +345,12 @@ static_assert(invocable<lvalue_volatile_qualified, S&>, "");
 static_assert(!invocable<lvalue_volatile_qualified, S const&>, "");
 static_assert(invocable<lvalue_volatile_qualified, S volatile&>, "");
 static_assert(!invocable<lvalue_volatile_qualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!invocable<lvalue_volatile_qualified, S&&>, "");
 static_assert(!invocable<lvalue_volatile_qualified, S const&&>, "");
 static_assert(!invocable<lvalue_volatile_qualified, S volatile&&>, "");
 static_assert(!invocable<lvalue_volatile_qualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 static_assert(check_member_is_invocable<int (S::*)() const volatile&, S&>(), "");
 using lvalue_cv_qualified = void (S::*)() const volatile&;
@@ -358,34 +358,34 @@ static_assert(invocable<lvalue_cv_qualified, S&>, "");
 static_assert(invocable<lvalue_cv_qualified, S const&>, "");
 static_assert(invocable<lvalue_cv_qualified, S volatile&>, "");
 static_assert(invocable<lvalue_cv_qualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!invocable<lvalue_cv_qualified, S&&>, "");
 static_assert(!invocable<lvalue_cv_qualified, S const&&>, "");
 static_assert(!invocable<lvalue_cv_qualified, S volatile&&>, "");
 static_assert(!invocable<lvalue_cv_qualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_unqualified = void (S::*)() &&;
 static_assert(!invocable<rvalue_unqualified, S&>, "");
 static_assert(!invocable<rvalue_unqualified, S const&>, "");
 static_assert(!invocable<rvalue_unqualified, S volatile&>, "");
 static_assert(!invocable<rvalue_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(invocable<rvalue_unqualified, S&&>, "");
 static_assert(!invocable<rvalue_unqualified, S const&&>, "");
 static_assert(!invocable<rvalue_unqualified, S volatile&&>, "");
 static_assert(!invocable<rvalue_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_const_unqualified = void (S::*)() const&&;
 static_assert(!invocable<rvalue_const_unqualified, S&>, "");
 static_assert(!invocable<rvalue_const_unqualified, S const&>, "");
 static_assert(!invocable<rvalue_const_unqualified, S volatile&>, "");
 static_assert(!invocable<rvalue_const_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(invocable<rvalue_const_unqualified, S&&>, "");
 static_assert(invocable<rvalue_const_unqualified, S const&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!invocable<rvalue_const_unqualified, S volatile&&>, "");
 static_assert(!invocable<rvalue_const_unqualified, S const volatile&&>, "");
 
@@ -394,24 +394,24 @@ static_assert(!invocable<rvalue_volatile_unqualified, S&>, "");
 static_assert(!invocable<rvalue_volatile_unqualified, S const&>, "");
 static_assert(!invocable<rvalue_volatile_unqualified, S volatile&>, "");
 static_assert(!invocable<rvalue_volatile_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(invocable<rvalue_volatile_unqualified, S&&>, "");
 static_assert(!invocable<rvalue_volatile_unqualified, S const&&>, "");
 static_assert(invocable<rvalue_volatile_unqualified, S volatile&&>, "");
 static_assert(!invocable<rvalue_volatile_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_cv_unqualified = void (S::*)() const volatile&&;
 static_assert(!invocable<rvalue_cv_unqualified, S&>, "");
 static_assert(!invocable<rvalue_cv_unqualified, S const&>, "");
 static_assert(!invocable<rvalue_cv_unqualified, S volatile&>, "");
 static_assert(!invocable<rvalue_cv_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(invocable<rvalue_cv_unqualified, S&&>, "");
 static_assert(invocable<rvalue_cv_unqualified, S const&&>, "");
 static_assert(invocable<rvalue_cv_unqualified, S volatile&&>, "");
 static_assert(invocable<rvalue_cv_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 } // namespace pointer_to_member_functions
 
 // Check the concept with closure types
diff --git a/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp b/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp
index e1a177ec49..83bf7d0faf 100644
--- a/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/concepts/concepts.callable/concept.regularinvocable/regular_invocable.compile.pass.cpp
@@ -343,12 +343,12 @@ static_assert(regular_invocable<lvalue_qualified, S&>, "");
 static_assert(!regular_invocable<lvalue_qualified, S const&>, "");
 static_assert(!regular_invocable<lvalue_qualified, S volatile&>, "");
 static_assert(!regular_invocable<lvalue_qualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!regular_invocable<lvalue_qualified, S&&>, "");
 static_assert(!regular_invocable<lvalue_qualified, S const&&>, "");
 static_assert(!regular_invocable<lvalue_qualified, S volatile&&>, "");
 static_assert(!regular_invocable<lvalue_qualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 #if TEST_STD_VER > 17
 static_assert(check_member_is_invocable<int (S::*)() const&, S>(), "");
@@ -371,12 +371,12 @@ static_assert(regular_invocable<lvalue_volatile_qualified, S&>, "");
 static_assert(!regular_invocable<lvalue_volatile_qualified, S const&>, "");
 static_assert(regular_invocable<lvalue_volatile_qualified, S volatile&>, "");
 static_assert(!regular_invocable<lvalue_volatile_qualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!regular_invocable<lvalue_volatile_qualified, S&&>, "");
 static_assert(!regular_invocable<lvalue_volatile_qualified, S const&&>, "");
 static_assert(!regular_invocable<lvalue_volatile_qualified, S volatile&&>, "");
 static_assert(!regular_invocable<lvalue_volatile_qualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 static_assert(check_member_is_invocable<int (S::*)() const volatile&, S&>(), "");
 using lvalue_cv_qualified = void (S::*)() const volatile&;
@@ -384,60 +384,60 @@ static_assert(regular_invocable<lvalue_cv_qualified, S&>, "");
 static_assert(regular_invocable<lvalue_cv_qualified, S const&>, "");
 static_assert(regular_invocable<lvalue_cv_qualified, S volatile&>, "");
 static_assert(regular_invocable<lvalue_cv_qualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(!regular_invocable<lvalue_cv_qualified, S&&>, "");
 static_assert(!regular_invocable<lvalue_cv_qualified, S const&&>, "");
 static_assert(!regular_invocable<lvalue_cv_qualified, S volatile&&>, "");
 static_assert(!regular_invocable<lvalue_cv_qualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_unqualified = void (S::*)() &&;
 static_assert(!regular_invocable<rvalue_unqualified, S&>, "");
 static_assert(!regular_invocable<rvalue_unqualified, S const&>, "");
 static_assert(!regular_invocable<rvalue_unqualified, S volatile&>, "");
 static_assert(!regular_invocable<rvalue_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(regular_invocable<rvalue_unqualified, S&&>, "");
 static_assert(!regular_invocable<rvalue_unqualified, S const&&>, "");
 static_assert(!regular_invocable<rvalue_unqualified, S volatile&&>, "");
 static_assert(!regular_invocable<rvalue_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_const_unqualified = void (S::*)() const&&;
 static_assert(!regular_invocable<rvalue_const_unqualified, S&>, "");
 static_assert(!regular_invocable<rvalue_const_unqualified, S const&>, "");
 static_assert(!regular_invocable<rvalue_const_unqualified, S volatile&>, "");
 static_assert(!regular_invocable<rvalue_const_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(regular_invocable<rvalue_const_unqualified, S&&>, "");
 static_assert(regular_invocable<rvalue_const_unqualified, S const&&>, "");
 static_assert(!regular_invocable<rvalue_const_unqualified, S volatile&&>, "");
 static_assert(!regular_invocable<rvalue_const_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_volatile_unqualified = void (S::*)() volatile&&;
 static_assert(!regular_invocable<rvalue_volatile_unqualified, S&>, "");
 static_assert(!regular_invocable<rvalue_volatile_unqualified, S const&>, "");
 static_assert(!regular_invocable<rvalue_volatile_unqualified, S volatile&>, "");
 static_assert(!regular_invocable<rvalue_volatile_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(regular_invocable<rvalue_volatile_unqualified, S&&>, "");
 static_assert(!regular_invocable<rvalue_volatile_unqualified, S const&&>, "");
 static_assert(regular_invocable<rvalue_volatile_unqualified, S volatile&&>, "");
 static_assert(!regular_invocable<rvalue_volatile_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 
 using rvalue_cv_unqualified = void (S::*)() const volatile&&;
 static_assert(!regular_invocable<rvalue_cv_unqualified, S&>, "");
 static_assert(!regular_invocable<rvalue_cv_unqualified, S const&>, "");
 static_assert(!regular_invocable<rvalue_cv_unqualified, S volatile&>, "");
 static_assert(!regular_invocable<rvalue_cv_unqualified, S const volatile&>, "");
-#ifndef TEST_COMPILER_MSVC_2017
+#if !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 static_assert(regular_invocable<rvalue_cv_unqualified, S&&>, "");
 static_assert(regular_invocable<rvalue_cv_unqualified, S const&&>, "");
 static_assert(regular_invocable<rvalue_cv_unqualified, S volatile&&>, "");
 static_assert(regular_invocable<rvalue_cv_unqualified, S const volatile&&>, "");
-#endif // !TEST_COMPILER_MSVC_2017
+#endif // !defined(TEST_COMPILER_MSVC_2017) && !defined(TEST_COMPILER_MSVC_2019)
 } // namespace pointer_to_member_functions
 
 // Check the concept with closure types (and also check for subsumption)
diff --git a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp
index 9d50337391..39504bcddb 100644
--- a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp
@@ -71,8 +71,9 @@ struct G {
   __host__ __device__ friend void iter_swap(const PointerTo<G>&, const PointerTo<F>&);
 };
 
-
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
 static_assert( cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<B>>);
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<C>>);
 static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<D>>);
 static_assert(!cuda::std::indirectly_swappable<PointerTo<A>, PointerTo<E>>);
diff --git a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp
index 1b5000f597..6582a92461 100644
--- a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/alg.req.permutable/permutable.compile.pass.cpp
@@ -28,6 +28,7 @@ static_assert( cuda::std::indirectly_movable_storable<NotAForwardIterator, NotAF
 static_assert( cuda::std::indirectly_swappable<NotAForwardIterator>);
 static_assert(!cuda::std::permutable<NotAForwardIterator>);
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
 struct NonCopyable {
   NonCopyable(const NonCopyable&) = delete;
   NonCopyable& operator=(const NonCopyable&) = delete;
@@ -39,6 +40,7 @@ static_assert( cuda::std::forward_iterator<NotIMS>);
 static_assert(!cuda::std::indirectly_movable_storable<NotIMS, NotIMS>);
 static_assert( cuda::std::indirectly_swappable<NotIMS>);
 static_assert(!cuda::std::permutable<NotIMS>);
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
 // Note: it is impossible for an iterator to satisfy `indirectly_movable_storable` but not `indirectly_swappable`:
 // `indirectly_swappable` requires both iterators to be `indirectly_readable` and for `ranges::iter_swap` to be
diff --git a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp
index 618a007132..22baf9841e 100644
--- a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/indirectcallable/indirectinvocable/indirectly_comparable.compile.pass.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: msvc-19.16 && nvcc-11.1
 
 // template<class I1, class I2, class R, class P1, class P2>
 // concept indirectly_comparable;
diff --git a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
index b50cc71940..402d30332a 100644
--- a/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.swap/iter_swap.pass.cpp
@@ -52,12 +52,14 @@ static_assert( cuda::std::is_invocable_v<IterSwapT&&, HasIterSwap&, int&>);
 static_assert(!cuda::std::is_invocable_v<IterSwapT&&, int&, HasIterSwap&>);
 #endif // TEST_COMPILER_CUDACC_BELOW_11_3
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
 struct NodiscardIterSwap {
   __host__ __device__ friend _LIBCUDACXX_NODISCARD_EXT int iter_swap(NodiscardIterSwap&, NodiscardIterSwap&) { return 0; }
 };
 
 __host__ __device__
 void ensureVoidCast(NodiscardIterSwap& a, NodiscardIterSwap& b) { cuda::std::ranges::iter_swap(a, b); }
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
 struct HasRangesSwap {
   int &value_;
@@ -141,6 +143,7 @@ struct MoveOnly2 {
 
 __host__ __device__ constexpr bool test()
 {
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   {
     int value1 = 0;
     int value2 = 0;
@@ -170,11 +173,13 @@ __host__ __device__ constexpr bool test()
     cuda::std::ranges::iter_swap(ePtr, fPtr);
     assert(e.value && f.value);
   }
+
   {
     MoveOnly1 g; MoveOnly2 h;
     cuda::std::ranges::iter_swap(&g, &h);
     assert(g.value && h.value);
   }
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 #if TEST_HAS_BUILTIN(__builtin_is_constant_evaluated)
   {
     move_tracker arr[2];
diff --git a/libcudacxx/.upstream-tests/test/std/thread/thread.barrier/completion.pass.cpp b/libcudacxx/.upstream-tests/test/std/thread/thread.barrier/completion.pass.cpp
index d701a6e4f4..6f7c7b4318 100644
--- a/libcudacxx/.upstream-tests/test/std/thread/thread.barrier/completion.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/thread/thread.barrier/completion.pass.cpp
@@ -8,9 +8,9 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: pre-sm-70
+// XFAIL: msvc-19.36 && c++20
 
 // Compiler bug for being unable to convert __nv_hdl lambdas
-// XFAIL: msvc-19.33
 
 // <cuda/std/barrier>
 
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp
index 0d51340696..dc2b2d9cd7 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp
@@ -46,7 +46,9 @@ static_assert(is_transparent<cuda::std::ranges::equal_to>);
 __host__ __device__ constexpr bool test() {
   auto fn = cuda::std::ranges::equal_to();
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   assert(fn(MoveOnly(42), MoveOnly(42)));
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
   ForwardingTestObject a{};
   ForwardingTestObject b{};
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater.pass.cpp
index 6b8c2b4318..ec397c2087 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater.pass.cpp
@@ -45,7 +45,9 @@ static_assert(is_transparent<cuda::std::ranges::greater>);
 __host__ __device__ constexpr bool test() {
   auto fn = cuda::std::ranges::greater();
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   assert(fn(MoveOnly(42), MoveOnly(41)));
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
   ForwardingTestObject a{};
   ForwardingTestObject b{};
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp
index 975b150c71..34752d8ae8 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp
@@ -45,7 +45,9 @@ static_assert(is_transparent<cuda::std::ranges::greater_equal>);
 __host__ __device__ constexpr bool test() {
   auto fn = cuda::std::ranges::greater_equal();
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   assert(fn(MoveOnly(42), MoveOnly(42)));
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
   ForwardingTestObject a{};
   ForwardingTestObject b{};
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less.pass.cpp
index 24a5f0a733..d82e247a3b 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less.pass.cpp
@@ -45,7 +45,9 @@ static_assert(is_transparent<cuda::std::ranges::less>);
 __host__ __device__ constexpr bool test() {
   auto fn = cuda::std::ranges::less();
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   assert(fn(MoveOnly(41), MoveOnly(42)));
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
   ForwardingTestObject a{};
   ForwardingTestObject b{};
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp
index aa9ddef778..f9a9196142 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp
@@ -45,7 +45,9 @@ static_assert(is_transparent<cuda::std::ranges::less_equal>);
 __host__ __device__ constexpr bool test() {
   auto fn = cuda::std::ranges::less_equal();
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   assert(fn(MoveOnly(41), MoveOnly(42)));
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
   // These are the opposite of other tests.
   ForwardingTestObject a{};
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp
index e45e6ff63b..5701a8449a 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp
@@ -52,7 +52,9 @@ struct PtrAndNotEqOperator {
 __host__ __device__ constexpr bool test() {
   auto fn = cuda::std::ranges::not_equal_to();
 
+#if !defined(TEST_COMPILER_CUDACC_BELOW_11_3) && !defined(TEST_COMPILER_MSVC_2017)
   assert(fn(MoveOnly(41), MoveOnly(42)));
+#endif // !TEST_COMPILER_CUDACC_BELOW_11_3 && !TEST_COMPILER_MSVC_2017
 
   // These are the opposite of other tests.
   ForwardingTestObject a{};
diff --git a/libcudacxx/.upstream-tests/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp b/libcudacxx/.upstream-tests/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp
index 53a6b50add..cdaaf211aa 100644
--- a/libcudacxx/.upstream-tests/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp
+++ b/libcudacxx/.upstream-tests/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp
@@ -9,7 +9,7 @@
 
 
 // UNSUPPORTED: c++98, c++03
-// XFAIL: msvc
+// UNSUPPORTED: msvc
 
 // <cuda/std/tuple>
 
diff --git a/libcudacxx/.upstream-tests/test/support/test_iterators.h b/libcudacxx/.upstream-tests/test/support/test_iterators.h
index 204d8519e7..c4c6854aa1 100644
--- a/libcudacxx/.upstream-tests/test/support/test_iterators.h
+++ b/libcudacxx/.upstream-tests/test/support/test_iterators.h
@@ -1133,6 +1133,7 @@ constexpr auto get_iterator_concept() {
   } else {
     return cuda::std::input_iterator_tag{};
   }
+  _LIBCUDACXX_UNREACHABLE();
 }
 
 template<class Base, cuda::std::enable_if_t<cuda::std::input_iterator<Base>, int> = 0>
diff --git a/libcudacxx/.upstream-tests/utils/libcudacxx/compiler.py b/libcudacxx/.upstream-tests/utils/libcudacxx/compiler.py
index 436de868f4..32b1323a8a 100644
--- a/libcudacxx/.upstream-tests/utils/libcudacxx/compiler.py
+++ b/libcudacxx/.upstream-tests/utils/libcudacxx/compiler.py
@@ -14,8 +14,9 @@
 class CXXCompiler(object):
     CM_Default = 0
     CM_PreProcess = 1
-    CM_Compile = 2
-    CM_Link = 3
+    CM_CheckCompileFlag = 2
+    CM_Compile = 3
+    CM_Link = 4
 
     def __init__(self, path, first_arg,
                  flags=None, compile_flags=None, link_flags=None,
@@ -151,8 +152,9 @@ def _basicCmd(self, source_files, out, mode=CM_Default, flags=[],
                   input_is_cxx=False):
         cmd = []
         if self.use_ccache \
-                and not mode == self.CM_Link \
-                and not mode == self.CM_PreProcess:
+            and not mode == self.CM_Link \
+            and not mode == self.CM_PreProcess \
+            and not mode == self.CM_CheckCompileFlag:
             cmd += [os.environ.get('CMAKE_CUDA_COMPILER_LAUNCHER')]
         cmd += [self.path] + ([self.first_arg] if self.first_arg != '' else [])
         if out is not None:
@@ -167,7 +169,8 @@ def _basicCmd(self, source_files, out, mode=CM_Default, flags=[],
             raise TypeError('source_files must be a string or list')
         if mode == self.CM_PreProcess:
             cmd += ['-E']
-        elif mode == self.CM_Compile:
+        elif mode == self.CM_Compile \
+          or mode == self.CM_CheckCompileFlag:
             cmd += ['-c']
         cmd += self.flags
         if self.use_verify:
@@ -179,7 +182,9 @@ def _basicCmd(self, source_files, out, mode=CM_Default, flags=[],
             cmd += self.compile_flags
             if self.use_warnings:
                 cmd += self.warning_flags
-        if mode != self.CM_PreProcess and mode != self.CM_Compile:
+        if mode != self.CM_PreProcess   \
+            and mode != self.CM_Compile \
+            and mode != self.CM_CheckCompileFlag:
             cmd += self.link_flags
         cmd += flags
         return cmd
@@ -189,9 +194,8 @@ def preprocessCmd(self, source_files, out=None, flags=[]):
                              mode=self.CM_PreProcess,
                              input_is_cxx=True)
 
-    def compileCmd(self, source_files, out=None, flags=[]):
-        return self._basicCmd(source_files, out, flags=flags,
-                             mode=self.CM_Compile,
+    def compileCmd(self, source_files, out=None, flags=[], mode = CM_Compile):
+        return self._basicCmd(source_files, out, flags=flags, mode=mode,
                              input_is_cxx=True) + ['-c']
 
     def linkCmd(self, source_files, out=None, flags=[]):
@@ -207,8 +211,14 @@ def preprocess(self, source_files, out=None, flags=[], cwd=None):
                                                   cwd=cwd)
         return cmd, out, err, rc
 
+    def checkCompileFlag(self, source_files, out=None, flags=[], cwd=None):
+        cmd = self.compileCmd(source_files, out, flags, self.CM_CheckCompileFlag)
+        out, err, rc = libcudacxx.util.executeCommand(cmd, env=self.compile_env,
+                                                  cwd=cwd)
+        return cmd, out, err, rc
+
     def compile(self, source_files, out=None, flags=[], cwd=None):
-        cmd = self.compileCmd(source_files, out, flags)
+        cmd = self.compileCmd(source_files, out, flags, self.CM_Compile)
         out, err, rc = libcudacxx.util.executeCommand(cmd, env=self.compile_env,
                                                   cwd=cwd)
         return cmd, out, err, rc
@@ -307,8 +317,7 @@ def hasCompileFlag(self, flag):
         if self.type is not None and self.type != 'nvcc' and self.type != 'msvc':
             flags += ['-Werror', '-fsyntax-only']
         empty_cpp = os.path.join(os.path.dirname(os.path.abspath(__file__)), "empty.cpp")
-        cmd, out, err, rc = self.compile(empty_cpp, out=os.devnull,
-                                         flags=flags)
+        cmd, out, err, rc = self.checkCompileFlag(empty_cpp, out=os.devnull, flags=flags)
         if out.find('flag is not supported with the configured host compiler') != -1:
             return False
         if err.find('flag is not supported with the configured host compiler') != -1:
diff --git a/libcudacxx/.upstream-tests/utils/libcudacxx/test/config.py b/libcudacxx/.upstream-tests/utils/libcudacxx/test/config.py
index c7ae87c8eb..fd5790e767 100644
--- a/libcudacxx/.upstream-tests/utils/libcudacxx/test/config.py
+++ b/libcudacxx/.upstream-tests/utils/libcudacxx/test/config.py
@@ -611,6 +611,7 @@ def configure_compile_flags(self):
         if self.is_windows:
             # FIXME: Can we remove this?
             self.cxx.compile_flags += ['-D_CRT_SECURE_NO_WARNINGS']
+            self.cxx.compile_flags += ['--use-local-env']
             # Required so that tests using min/max don't fail on Windows,
             # and so that those tests don't have to be changed to tolerate
             # this insanity.
@@ -751,7 +752,8 @@ def configure_default_compile_flags(self):
         if enable_32bit:
             self.cxx.flags += ['-m32']
         # Use verbose output for better errors
-        self.cxx.flags += ['-v']
+        if not self.cxx.use_ccache or self.cxx.type == 'msvc':
+            self.cxx.flags += ['-v']
         sysroot = self.get_lit_conf('sysroot')
         if sysroot:
             self.cxx.flags += ['--sysroot=' + sysroot]
@@ -971,6 +973,9 @@ def configure_link_flags(self):
         if nvcc_host_compiler and self.cxx.type == 'nvcc':
             self.cxx.link_flags += ['-ccbin={0}'.format(nvcc_host_compiler)]
 
+        if self.is_windows:
+            self.cxx.link_flags += ['--use-local-env']
+
         # Configure library path
         self.configure_link_flags_cxx_library_path()
         self.configure_link_flags_abi_library_path()
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config
index d0bd126cf4..a6f6949f6d 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/__config
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__config
@@ -878,8 +878,16 @@ extern "C++" {
        _Pragma(_LIBCUDACXX_TOSTRING(diag_suppress _WARNING))
 #    define _LIBCUDACXX_NV_DIAG_DEFAULT(_WARNING) _Pragma(_LIBCUDACXX_TOSTRING(diagnostic pop))
 #  else // _LIBCUDACXX_CUDACC_BELOW_11_3
-#    define _LIBCUDACXX_NV_DIAG_SUPPRESS(_WARNING) _Pragma(_LIBCUDACXX_TOSTRING(diag_suppress _WARNING))
-#    define _LIBCUDACXX_NV_DIAG_DEFAULT(_WARNING)  _Pragma(_LIBCUDACXX_TOSTRING(diag_default _WARNING))
+#    if defined(_LIBCUDACXX_COMPILER_MSVC_2017) // MSVC 2017 has issues with restoring the warning
+#      define _LIBCUDACXX_NV_DIAG_SUPPRESS(_WARNING) __pragma(_LIBCUDACXX_TOSTRING(diag_suppress _WARNING))
+#      define _LIBCUDACXX_NV_DIAG_DEFAULT(_WARNING)
+#    elif defined(_LIBCUDACXX_COMPILER_MSVC)
+#      define _LIBCUDACXX_NV_DIAG_SUPPRESS(_WARNING) __pragma(_LIBCUDACXX_TOSTRING(diag_suppress _WARNING))
+#      define _LIBCUDACXX_NV_DIAG_DEFAULT(_WARNING)  __pragma(_LIBCUDACXX_TOSTRING(diag_default _WARNING))
+#    else // ^^^ MSVC ^^^ / vvv not MSVC
+#      define _LIBCUDACXX_NV_DIAG_SUPPRESS(_WARNING) _Pragma(_LIBCUDACXX_TOSTRING(diag_suppress _WARNING))
+#      define _LIBCUDACXX_NV_DIAG_DEFAULT(_WARNING)  _Pragma(_LIBCUDACXX_TOSTRING(diag_default _WARNING))
+#    endif // not MSVC
 #  endif // !__NVCC_DIAG_PRAGMA_SUPPORT__
 #else // ^^^ _LIBCUDACXX_CUDACC ^^^ / vvv other compiler vvv
 #  define _LIBCUDACXX_NV_DIAG_SUPPRESS(_WARNING)
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib b/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib
index 5472191b1b..cd6768e0e2 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib
@@ -98,29 +98,24 @@ void *aligned_alloc(size_t alignment, size_t size);                       // C11
 #pragma GCC system_header
 #endif
 
-#if defined(_LIBCUDACXX_CUDACC_BELOW_11_2)
+
 #ifdef __CUDA_ARCH__
+#if defined(_LIBCUDACXX_CUDACC_BELOW_11_2)
 #  define _LIBCUDACXX_UNREACHABLE() __trap()
-#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv
-#  define _LIBCUDACXX_UNREACHABLE() __builtin_unreachable()
-#endif // !__CUDA_ARCH__
 #elif defined(_LIBCUDACXX_CUDACC_BELOW_11_3)
-#ifdef __CUDA_ARCH__
 #  define _LIBCUDACXX_UNREACHABLE() __builtin_assume(false)
-#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv
+#else
 #  define _LIBCUDACXX_UNREACHABLE() __builtin_unreachable()
-#endif // !__CUDA_ARCH__
-#elif defined(_LIBCUDACXX_COMPILER_MSVC)
+#endif // CUDACC above 11.4
+#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv
+#if defined(_LIBCUDACXX_COMPILER_MSVC)
 #  define _LIBCUDACXX_UNREACHABLE() __assume(false)
 #elif defined(_LIBCUDACXX_COMPILER_GCC) || __has_builtin(__builtin_unreachable)
 #  define _LIBCUDACXX_UNREACHABLE() __builtin_unreachable()
 #else // Other compilers
-#ifdef __CUDA_ARCH__
-#  define _LIBCUDACXX_UNREACHABLE() __trap()
-#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv
 #  define _LIBCUDACXX_UNREACHABLE() ::abort()
-#endif // !__CUDA_ARCH__
 #endif // Other compilers
+#endif // !__CUDA_ARCH__
 
 #ifdef _LIBCUDACXX_COMPILER_NVHPC
 #define _LIBCUDACXX_UNREACHABLE_AFTER_SWITCH()
diff --git a/thrust/cmake/ThrustBuildCompilerTargets.cmake b/thrust/cmake/ThrustBuildCompilerTargets.cmake
index 6e887ad34a..f65cb1bd95 100644
--- a/thrust/cmake/ThrustBuildCompilerTargets.cmake
+++ b/thrust/cmake/ThrustBuildCompilerTargets.cmake
@@ -29,6 +29,9 @@ function(thrust_build_compiler_targets)
   if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
     append_option_if_available("/W4" cxx_compile_options)
 
+    # sccache cannot handle the -Fd option generationg pdb files
+    set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
+
     # Treat all warnings as errors. This is only supported on Release builds,
     # as `nv_exec_check_disable` doesn't seem to work with MSVC debug iterators
     # and spurious warnings are emitted.
@@ -140,6 +143,14 @@ function(thrust_build_compiler_targets)
     $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Wno-deprecated-gpu-targets>
   )
 
+  if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
+    # Use the local env instead of rebuilding it all the time
+    target_compile_options(thrust.compiler_interface INTERFACE
+      # If using CUDA w/ NVCC...
+      $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--use-local-env>
+    )
+  endif()
+
   # This is kept separate for Github issue #1174.
   add_library(thrust.promote_cudafe_warnings INTERFACE)
   target_compile_options(thrust.promote_cudafe_warnings INTERFACE
diff --git a/thrust/testing/random.cu b/thrust/testing/random.cu
index 53a165055b..43298bd466 100644
--- a/thrust/testing/random.cu
+++ b/thrust/testing/random.cu
@@ -739,6 +739,7 @@ void TestRanlux48Unequal(void)
 DECLARE_UNITTEST(TestRanlux48Unequal);
 
 
+THRUST_DISABLE_MSVC_WARNING_BEGIN(4305) // truncation warning
 template<typename Distribution, typename Validator>
   void ValidateDistributionCharacteristic(void)
 {
@@ -766,29 +767,23 @@ template<typename Distribution, typename Validator>
     // test Distribution with same range as engine
 
     // test host
-    THRUST_DISABLE_MSVC_WARNING_BEGIN(4305)
     thrust::generate(h.begin(), h.end(), Validator(
         Distribution(Engine::min, Engine::max)
     ));
-    THRUST_DISABLE_MSVC_WARNING_END(4305)
 
     ASSERT_EQUAL(true, h[0]);
 
     // test device
-    THRUST_DISABLE_MSVC_WARNING_BEGIN(4305)
     thrust::generate(d.begin(), d.end(), Validator(
         Distribution(Engine::min, Engine::max)
     ));
-    THRUST_DISABLE_MSVC_WARNING_END(4305)
 
     ASSERT_EQUAL(true, d[0]);
 
     // test Distribution with smaller range than engine
 
     // test host
-    THRUST_DISABLE_MSVC_WARNING_BEGIN(4305) // Truncation warning.
     typename Distribution::result_type engine_range = Engine::max - Engine::min;
-    THRUST_DISABLE_MSVC_WARNING_END(4305)
     thrust::generate(h.begin(), h.end(), Validator(Distribution(engine_range/3, (2 * engine_range)/3)));
 
     ASSERT_EQUAL(true, h[0]);
@@ -812,6 +807,7 @@ template<typename Distribution, typename Validator>
 
   ASSERT_EQUAL(true, d[0]);
 }
+THRUST_DISABLE_MSVC_WARNING_END(4305)
 
 
 template<typename Distribution>
@@ -836,7 +832,7 @@ void TestUniformIntDistributionMin(void)
 {
   typedef thrust::random::uniform_int_distribution<int>          int_dist;
   typedef thrust::random::uniform_int_distribution<unsigned int> uint_dist;
-  
+
   ValidateDistributionCharacteristic<int_dist,  ValidateDistributionMin<int_dist,  thrust::minstd_rand> >();
   ValidateDistributionCharacteristic<uint_dist, ValidateDistributionMin<uint_dist, thrust::minstd_rand> >();
 }
@@ -847,7 +843,7 @@ void TestUniformIntDistributionMax(void)
 {
   typedef thrust::random::uniform_int_distribution<int>          int_dist;
   typedef thrust::random::uniform_int_distribution<unsigned int> uint_dist;
-  
+
   ValidateDistributionCharacteristic<int_dist,  ValidateDistributionMax<int_dist,  thrust::minstd_rand> >();
   ValidateDistributionCharacteristic<uint_dist, ValidateDistributionMax<uint_dist, thrust::minstd_rand> >();
 }
@@ -869,7 +865,7 @@ void TestUniformRealDistributionMin(void)
 {
   typedef thrust::random::uniform_real_distribution<float>  float_dist;
   typedef thrust::random::uniform_real_distribution<double> double_dist;
-  
+
   ValidateDistributionCharacteristic<float_dist,  ValidateDistributionMin<float_dist,  thrust::minstd_rand> >();
   ValidateDistributionCharacteristic<double_dist, ValidateDistributionMin<double_dist, thrust::minstd_rand> >();
 }
@@ -880,7 +876,7 @@ void TestUniformRealDistributionMax(void)
 {
   typedef thrust::random::uniform_real_distribution<float>  float_dist;
   typedef thrust::random::uniform_real_distribution<double> double_dist;
-  
+
   ValidateDistributionCharacteristic<float_dist,  ValidateDistributionMax<float_dist,  thrust::minstd_rand> >();
   ValidateDistributionCharacteristic<double_dist, ValidateDistributionMax<double_dist, thrust::minstd_rand> >();
 }
@@ -902,7 +898,7 @@ void TestNormalDistributionMin(void)
 {
   typedef thrust::random::normal_distribution<float>  float_dist;
   typedef thrust::random::normal_distribution<double> double_dist;
-  
+
   ValidateDistributionCharacteristic<float_dist,  ValidateDistributionMin<float_dist,  thrust::minstd_rand> >();
   ValidateDistributionCharacteristic<double_dist, ValidateDistributionMin<double_dist, thrust::minstd_rand> >();
 }
@@ -913,7 +909,7 @@ void TestNormalDistributionMax(void)
 {
   typedef thrust::random::normal_distribution<float>  float_dist;
   typedef thrust::random::normal_distribution<double> double_dist;
-  
+
   ValidateDistributionCharacteristic<float_dist,  ValidateDistributionMax<float_dist,  thrust::minstd_rand> >();
   ValidateDistributionCharacteristic<double_dist, ValidateDistributionMax<double_dist, thrust::minstd_rand> >();
 }