From d9fbf656da4ba532cba50d753d83e2e6e816e51a Mon Sep 17 00:00:00 2001
From: "k.koide" <k.koide@aist.go.jp>
Date: Wed, 14 Aug 2024 15:12:54 +0900
Subject: [PATCH] change file extensions to avoid TBB-related errors

---
 CMakeLists.txt                                |  6 ++---
 .../impl/integrated_gicp_factor_impl.hpp      |  6 +++--
 ...et_gpu.cu => nonlinear_factor_set_gpu.cpp} | 22 ++++++-------------
 ...cu => nonlinear_factor_set_gpu_create.cpp} |  0
 ...gpu.cu => integrated_vgicp_factor_gpu.cpp} |  0
 5 files changed, 14 insertions(+), 20 deletions(-)
 rename src/gtsam_points/cuda/{nonlinear_factor_set_gpu.cu => nonlinear_factor_set_gpu.cpp} (94%)
 rename src/gtsam_points/cuda/{nonlinear_factor_set_gpu_create.cu => nonlinear_factor_set_gpu_create.cpp} (100%)
 rename src/gtsam_points/factors/{integrated_vgicp_factor_gpu.cu => integrated_vgicp_factor_gpu.cpp} (100%)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9edf092a..3a818aff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -183,8 +183,8 @@ if(BUILD_WITH_CUDA)
     src/gtsam_points/cuda/cuda_graph.cu
     src/gtsam_points/cuda/cuda_graph_exec.cu
     # src/gtsam_points/cuda/gl_buffer_map.cu
-    src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu
-    src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu
+    src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp
+    src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp
     src/gtsam_points/cuda/stream_roundrobin.cu
     src/gtsam_points/cuda/stream_temp_buffer_roundrobin.cu
     # types
@@ -197,7 +197,7 @@ if(BUILD_WITH_CUDA)
     src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu
     src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu
     src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu
-    src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu
+    src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp
     # util
     src/gtsam_points/util/easy_profiler_cuda.cu
   )
diff --git a/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp
index 7f4b11e7..53c90a70 100644
--- a/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp
+++ b/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp
@@ -105,6 +105,10 @@ void IntegratedGICPFactor_<TargetFrame, SourceFrame>::update_correspondences(con
     }
   }
 
+  if (do_update) {
+    last_correspondence_point = delta;
+  }
+
   correspondences.resize(frame::size(*source));
   mahalanobis.resize(frame::size(*source));
 
@@ -130,8 +134,6 @@ void IntegratedGICPFactor_<TargetFrame, SourceFrame>::update_correspondences(con
       mahalanobis[i](3, 3) = 0.0;
     }
   }
-
-  last_correspondence_point = delta;
 }
 
 template <typename TargetFrame, typename SourceFrame>
diff --git a/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu b/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp
similarity index 94%
rename from src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu
rename to src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp
index bb771c35..0a9136c1 100644
--- a/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu
+++ b/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp
@@ -12,14 +12,14 @@ namespace gtsam_points {
 NonlinearFactorSetGPU::DeviceBuffer::DeviceBuffer() : size(0), buffer(nullptr) {}
 
 NonlinearFactorSetGPU::DeviceBuffer::~DeviceBuffer() {
-  if(buffer) {
+  if (buffer) {
     check_error << cudaFreeAsync(buffer, 0);
   }
 }
 
 void NonlinearFactorSetGPU::DeviceBuffer::resize(size_t size, CUstream_st* stream) {
-  if(this->size < size) {
-    if(buffer) {
+  if (this->size < size) {
+    if (buffer) {
       check_error << cudaFreeAsync(buffer, stream);
     }
     check_error << cudaMallocAsync(&buffer, size, stream);
@@ -163,12 +163,8 @@ void NonlinearFactorSetGPU::error(const gtsam::Values& values) {
   }
 
   // copy input buffer from cpu to gpu
-  check_error << cudaMemcpyAsync(
-    evaluation_input_buffer_gpu->data(),
-    evaluation_input_buffer_cpu.data(),
-    input_buffer_size,
-    cudaMemcpyHostToDevice,
-    stream);
+  check_error
+    << cudaMemcpyAsync(evaluation_input_buffer_gpu->data(), evaluation_input_buffer_cpu.data(), input_buffer_size, cudaMemcpyHostToDevice, stream);
   check_error << cudaStreamSynchronize(stream);
 
   // issue error computation
@@ -195,12 +191,8 @@ void NonlinearFactorSetGPU::error(const gtsam::Values& values) {
   }
 
   // copy output buffer from gpu to cpu
-  check_error << cudaMemcpyAsync(
-    evaluation_output_buffer_cpu.data(),
-    evaluation_output_buffer_gpu->data(),
-    output_buffer_size,
-    cudaMemcpyDeviceToHost,
-    stream);
+  check_error
+    << cudaMemcpyAsync(evaluation_output_buffer_cpu.data(), evaluation_output_buffer_gpu->data(), output_buffer_size, cudaMemcpyDeviceToHost, stream);
   check_error << cudaStreamSynchronize(stream);
 
   // store computed results
diff --git a/src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu b/src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp
similarity index 100%
rename from src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu
rename to src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp
diff --git a/src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu b/src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp
similarity index 100%
rename from src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu
rename to src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp