diff --git a/CMakeLists.txt b/CMakeLists.txt index 9edf092a..3a818aff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -183,8 +183,8 @@ if(BUILD_WITH_CUDA) src/gtsam_points/cuda/cuda_graph.cu src/gtsam_points/cuda/cuda_graph_exec.cu # src/gtsam_points/cuda/gl_buffer_map.cu - src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu - src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu + src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp + src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp src/gtsam_points/cuda/stream_roundrobin.cu src/gtsam_points/cuda/stream_temp_buffer_roundrobin.cu # types @@ -197,7 +197,7 @@ if(BUILD_WITH_CUDA) src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu - src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu + src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp # util src/gtsam_points/util/easy_profiler_cuda.cu ) diff --git a/include/gtsam_points/ann/impl/incremental_voxelmap_impl.hpp b/include/gtsam_points/ann/impl/incremental_voxelmap_impl.hpp index 45a5380a..5faf2f08 100644 --- a/include/gtsam_points/ann/impl/incremental_voxelmap_impl.hpp +++ b/include/gtsam_points/ann/impl/incremental_voxelmap_impl.hpp @@ -68,14 +68,14 @@ void IncrementalVoxelMap::insert(const PointCloud& points) { } template -size_t IncrementalVoxelMap::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { +size_t IncrementalVoxelMap::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist) const { const Eigen::Vector4d query = (Eigen::Vector4d() << pt[0], pt[1], pt[2], 1.0).finished(); const Eigen::Vector3i center = fast_floor(query * inv_leaf_size).template head<3>(); size_t voxel_index = 0; const auto index_transform = [&](const size_t point_index) { return calc_index(voxel_index, point_index); }; - KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform); + KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform, max_sq_dist); for (const auto& offset : offsets) { const Eigen::Vector3i coord = center + offset; const auto found = voxels.find(coord); diff --git a/include/gtsam_points/ann/incremental_covariance_voxelmap.hpp b/include/gtsam_points/ann/incremental_covariance_voxelmap.hpp index 4ace33a7..74d11535 100644 --- a/include/gtsam_points/ann/incremental_covariance_voxelmap.hpp +++ b/include/gtsam_points/ann/incremental_covariance_voxelmap.hpp @@ -39,10 +39,16 @@ struct IncrementalCovarianceVoxelMap : public IncrementalVoxelMap::max()) const override; /// @brief Find k-nearest neighbors. This finds neighbors regardless of the validity of covariances. - size_t knn_search_force(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const; + size_t knn_search_force(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist = std::numeric_limits::max()) + const; /// @brief Get valid point indices. If num_threads is -1, the member variable num_threads is used. std::vector valid_indices(int num_threads = -1) const; diff --git a/include/gtsam_points/ann/incremental_voxelmap.hpp b/include/gtsam_points/ann/incremental_voxelmap.hpp index d8a5c13e..650d5b93 100644 --- a/include/gtsam_points/ann/incremental_voxelmap.hpp +++ b/include/gtsam_points/ann/incremental_voxelmap.hpp @@ -73,7 +73,12 @@ struct IncrementalVoxelMap : public NearestNeighborSearch { /// @param k_indices Indices of the k nearest neighbors /// @param k_sq_dists Squared distances of the k nearest neighbors /// @return Number of found neighbors - virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override; + virtual size_t knn_search( + const double* pt, + size_t k, + size_t* k_indices, + double* k_sq_dists, + double max_sq_dist = std::numeric_limits::max()) const override; /// @brief Calculate the global point index from the voxel index and the point index. inline size_t calc_index(const size_t voxel_id, const size_t point_id) const { return (voxel_id << point_id_bits) | point_id; } diff --git a/include/gtsam_points/ann/intensity_kdtree.hpp b/include/gtsam_points/ann/intensity_kdtree.hpp index 0a6ee569..d8d25498 100644 --- a/include/gtsam_points/ann/intensity_kdtree.hpp +++ b/include/gtsam_points/ann/intensity_kdtree.hpp @@ -52,7 +52,12 @@ struct IntensityKdTree : public NearestNeighborSearch { return false; } - virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override; + virtual size_t knn_search( + const double* pt, + size_t k, + size_t* k_indices, + double* k_sq_dists, + double max_sq_dist = std::numeric_limits::max()) const override; public: const int num_points; diff --git a/include/gtsam_points/ann/kdtree.hpp b/include/gtsam_points/ann/kdtree.hpp index 104beee5..4348caad 100644 --- a/include/gtsam_points/ann/kdtree.hpp +++ b/include/gtsam_points/ann/kdtree.hpp @@ -31,7 +31,12 @@ struct KdTree : public NearestNeighborSearch { /// @param k_indices Indices of k nearest neighbors /// @param k_sq_dists Squared distances of k nearest neighbors /// @return Number of neighbors found - virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override; + virtual size_t knn_search( + const double* pt, + size_t k, + size_t* k_indices, + double* k_sq_dists, + double max_sq_dist = std::numeric_limits::max()) const override; public: const int num_points; diff --git a/include/gtsam_points/ann/kdtree2.hpp b/include/gtsam_points/ann/kdtree2.hpp index eeb2bb79..a513514a 100644 --- a/include/gtsam_points/ann/kdtree2.hpp +++ b/include/gtsam_points/ann/kdtree2.hpp @@ -38,8 +38,15 @@ struct KdTree2 : public NearestNeighborSearch { /// @param k_indices Indices of k nearest neighbors /// @param k_sq_dists Squared distances of k nearest neighbors /// @return Number of neighbors found - virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override { - return index->knn_search(Eigen::Map(pt), k, k_indices, k_sq_dists); + virtual size_t knn_search( + const double* pt, + size_t k, + size_t* k_indices, + double* k_sq_dists, + double max_sq_dist = std::numeric_limits::max()) const override { + KnnSetting setting; + setting.max_sq_dist = max_sq_dist; + return index->knn_search(Eigen::Map(pt), k, k_indices, k_sq_dists, setting); } public: diff --git a/include/gtsam_points/ann/knn_result.hpp b/include/gtsam_points/ann/knn_result.hpp index 0fdaee6e..944a3169 100644 --- a/include/gtsam_points/ann/knn_result.hpp +++ b/include/gtsam_points/ann/knn_result.hpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT #pragma once +#include #include namespace gtsam_points { @@ -16,7 +17,8 @@ struct KnnSetting { } public: - double epsilon = 0.0; ///< Early termination threshold + double max_sq_dist = std::numeric_limits::max(); ///< Maximum squared distance to search + double epsilon = 0.0; ///< Early termination threshold }; /// @brief Identity transformation function. (use std::identity instead in C++20) @@ -39,7 +41,12 @@ struct KnnResult { /// @param distances Buffer to store distances (must be larger than k=max(N, num_neighbors)) /// @param num_neighbors Number of neighbors to search (must be -1 for static case N > 0) /// @param index_transform Index transformation function (e.g., local point index -> global voxel + point index) - explicit KnnResult(size_t* indices, double* distances, int num_neighbors = -1, const IndexTransform& index_transform = IndexTransform()) + explicit KnnResult( + size_t* indices, + double* distances, + int num_neighbors = -1, + const IndexTransform& index_transform = IndexTransform(), + double max_sq_dist = std::numeric_limits::max()) : index_transform(index_transform), capacity(num_neighbors), num_found_neighbors(0), @@ -59,7 +66,7 @@ struct KnnResult { } std::fill(this->indices, this->indices + buffer_size(), INVALID); - std::fill(this->distances, this->distances + buffer_size(), std::numeric_limits::max()); + std::fill(this->distances, this->distances + buffer_size(), max_sq_dist); } /// @brief Buffer size (i.e., Maximum number of neighbors) diff --git a/include/gtsam_points/ann/nearest_neighbor_search.hpp b/include/gtsam_points/ann/nearest_neighbor_search.hpp index c1029d59..8543610f 100644 --- a/include/gtsam_points/ann/nearest_neighbor_search.hpp +++ b/include/gtsam_points/ann/nearest_neighbor_search.hpp @@ -4,6 +4,7 @@ #pragma once #include +#include namespace gtsam_points { @@ -25,6 +26,9 @@ struct NearestNeighborSearch { * @param k_indices Indices of k-nearest neighbors * @param k_sq_dists Squared distances to the neighbors */ - virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { return 0; }; + virtual size_t + knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist = std::numeric_limits::max()) const { + return 0; + }; }; } // namespace gtsam_points \ No newline at end of file diff --git a/include/gtsam_points/ann/small_kdtree.hpp b/include/gtsam_points/ann/small_kdtree.hpp index 55d46990..23a2df10 100644 --- a/include/gtsam_points/ann/small_kdtree.hpp +++ b/include/gtsam_points/ann/small_kdtree.hpp @@ -309,7 +309,7 @@ struct UnsafeKdTree { /// @return Number of found neighbors size_t knn_search(const Eigen::Vector3d& query_, int k, size_t* k_indices, double* k_sq_dists, const KnnSetting& setting = KnnSetting()) const { const Eigen::Vector4d query = (Eigen::Vector4d() << query_, 1.0).finished(); - KnnResult<-1> result(k_indices, k_sq_dists, k); + KnnResult<-1> result(k_indices, k_sq_dists, k, identity_transform(), setting.max_sq_dist); knn_search(query, root, result, setting); return result.num_found(); } @@ -323,7 +323,7 @@ struct UnsafeKdTree { template size_t knn_search(const Eigen::Vector3d& query_, size_t* k_indices, double* k_sq_dists, const KnnSetting& setting = KnnSetting()) const { const Eigen::Vector4d query = (Eigen::Vector4d() << query_, 1.0).finished(); - KnnResult result(k_indices, k_sq_dists); + KnnResult result(k_indices, k_sq_dists, -1, identity_transform(), setting.max_sq_dist); knn_search(query, root, result, setting); return result.num_found(); } diff --git a/include/gtsam_points/cuda/kernels/linearized_system.cuh b/include/gtsam_points/cuda/kernels/linearized_system.cuh index 7c97f16c..1f9e7c6a 100644 --- a/include/gtsam_points/cuda/kernels/linearized_system.cuh +++ b/include/gtsam_points/cuda/kernels/linearized_system.cuh @@ -4,7 +4,6 @@ #pragma once #include -#include namespace gtsam_points { diff --git a/include/gtsam_points/cuda/kernels/lookup_voxels.cuh b/include/gtsam_points/cuda/kernels/lookup_voxels.cuh index 2aeb19e4..b0c227ee 100644 --- a/include/gtsam_points/cuda/kernels/lookup_voxels.cuh +++ b/include/gtsam_points/cuda/kernels/lookup_voxels.cuh @@ -22,9 +22,9 @@ struct lookup_voxels_kernel { lookup_voxels_kernel( const GaussianVoxelMapGPU& voxelmap, - const thrust::device_ptr& points, - const thrust::device_ptr& normals, - const thrust::device_ptr& x_ptr) + const Eigen::Vector3f* points, + const Eigen::Vector3f* normals, + const Eigen::Isometry3f* x_ptr) : x_ptr(x_ptr), voxelmap_info_ptr(voxelmap.voxelmap_info_ptr), buckets_ptr(voxelmap.buckets), @@ -32,14 +32,14 @@ struct lookup_voxels_kernel { normals_ptr(normals) {} __host__ __device__ thrust::pair operator()(int point_idx) const { - const auto& info = *thrust::raw_pointer_cast(voxelmap_info_ptr); + const auto& info = *voxelmap_info_ptr; - const Eigen::Isometry3f& trans = *thrust::raw_pointer_cast(x_ptr); - const Eigen::Vector3f& x = thrust::raw_pointer_cast(points_ptr)[point_idx]; + const Eigen::Isometry3f& trans = *x_ptr; + const Eigen::Vector3f& x = points_ptr[point_idx]; const Eigen::Vector3f transed_x = trans.linear() * x + trans.translation(); if (enable_surface_validation) { - const Eigen::Vector3f& normal = thrust::raw_pointer_cast(normals_ptr)[point_idx]; + const Eigen::Vector3f& normal = normals_ptr[point_idx]; const Eigen::Vector3f transed_normal = trans.linear() * normal; // 0.17 = cos(10 deg) @@ -57,17 +57,17 @@ struct lookup_voxels_kernel { } __host__ __device__ void operator()(thrust::pair& point_voxel_indices) const { - const auto& info = *thrust::raw_pointer_cast(voxelmap_info_ptr); + const auto& info = *voxelmap_info_ptr; const int point_idx = thrust::get<0>(point_voxel_indices); int& voxel_idx = thrust::get<1>(point_voxel_indices); - const Eigen::Isometry3f& trans = *thrust::raw_pointer_cast(x_ptr); - const Eigen::Vector3f& x = thrust::raw_pointer_cast(points_ptr)[point_idx]; + const Eigen::Isometry3f& trans = *x_ptr; + const Eigen::Vector3f& x = points_ptr[point_idx]; const Eigen::Vector3f transed_x = trans.linear() * x + trans.translation(); if (enable_surface_validation) { - const Eigen::Vector3f& normal = thrust::raw_pointer_cast(normals_ptr)[point_idx]; + const Eigen::Vector3f& normal = normals_ptr[point_idx]; const Eigen::Vector3f transed_normal = trans.linear() * normal; if (transed_x.normalized().dot(transed_normal) > surface_validation_thresh) { @@ -84,13 +84,13 @@ struct lookup_voxels_kernel { // f(50) = 0.643 f(40) = 0.766 f(30) = 0.866 f(20) = 0.940 static const float constexpr surface_validation_thresh = 0.174; // cos(80.0 * M_PI / 180.0) - thrust::device_ptr x_ptr; + const Eigen::Isometry3f* x_ptr; - thrust::device_ptr voxelmap_info_ptr; - thrust::device_ptr buckets_ptr; + const VoxelMapInfo* voxelmap_info_ptr; + const VoxelBucket* buckets_ptr; - thrust::device_ptr points_ptr; - thrust::device_ptr normals_ptr; + const Eigen::Vector3f* points_ptr; + const Eigen::Vector3f* normals_ptr; }; struct invalid_correspondence_kernel { diff --git a/include/gtsam_points/cuda/kernels/vector3_hash.cuh b/include/gtsam_points/cuda/kernels/vector3_hash.cuh index 56b00b3b..ebf66a9e 100644 --- a/include/gtsam_points/cuda/kernels/vector3_hash.cuh +++ b/include/gtsam_points/cuda/kernels/vector3_hash.cuh @@ -53,7 +53,7 @@ inline __host__ __device__ Eigen::Vector3i calc_voxel_coord(const Eigen::Vector3 inline __host__ __device__ int lookup_voxel( const int max_bucket_scan_count, const int num_buckets, - const thrust::device_ptr& buckets_ptr, + const VoxelBucket* buckets_ptr, const float resolution, const Eigen::Vector3f& x) { Eigen::Vector3i coord = calc_voxel_coord(x, resolution); @@ -61,7 +61,7 @@ inline __host__ __device__ int lookup_voxel( for (int i = 0; i < max_bucket_scan_count; i++) { uint64_t bucket_index = (hash + i) % num_buckets; - const auto& bucket = thrust::raw_pointer_cast(buckets_ptr)[bucket_index]; + const auto& bucket = buckets_ptr[bucket_index]; if (bucket.second < 0) { return -1; diff --git a/include/gtsam_points/cuda/kernels/vgicp_derivatives.cuh b/include/gtsam_points/cuda/kernels/vgicp_derivatives.cuh index 4cd7a6b8..2c4dbd1c 100644 --- a/include/gtsam_points/cuda/kernels/vgicp_derivatives.cuh +++ b/include/gtsam_points/cuda/kernels/vgicp_derivatives.cuh @@ -14,10 +14,10 @@ namespace gtsam_points { struct vgicp_derivatives_kernel { vgicp_derivatives_kernel( - const thrust::device_ptr& linearization_point_ptr, + const Eigen::Isometry3f* linearization_point_ptr, const GaussianVoxelMapGPU& voxelmap, - const thrust::device_ptr& source_means, - const thrust::device_ptr& source_covs) + const Eigen::Vector3f* source_means, + const Eigen::Matrix3f* source_covs) : linearization_point_ptr(linearization_point_ptr), voxel_num_points_ptr(voxelmap.num_points), voxel_means_ptr(voxelmap.voxel_means), @@ -32,18 +32,18 @@ struct vgicp_derivatives_kernel { return LinearizedSystem6::zero(); } - const Eigen::Isometry3f& x = *thrust::raw_pointer_cast(linearization_point_ptr); + const Eigen::Isometry3f& x = *linearization_point_ptr; const Eigen::Matrix3f R = x.linear(); const Eigen::Vector3f t = x.translation(); - const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(source_means_ptr)[source_idx]; - const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(source_covs_ptr)[source_idx]; + const Eigen::Vector3f& mean_A = source_means_ptr[source_idx]; + const Eigen::Matrix3f& cov_A = source_covs_ptr[source_idx]; const Eigen::Vector3f transed_mean_A = R * mean_A + t; - const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[target_idx]; - const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[target_idx]; + const Eigen::Vector3f& mean_B = voxel_means_ptr[target_idx]; + const Eigen::Matrix3f& cov_B = voxel_covs_ptr[target_idx]; - const int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[target_idx]; + const int num_points = voxel_num_points_ptr[target_idx]; const Eigen::Matrix3f RCR = (R * cov_A * R.transpose()); const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); @@ -71,23 +71,23 @@ struct vgicp_derivatives_kernel { return linearized; } - thrust::device_ptr linearization_point_ptr; + const Eigen::Isometry3f* linearization_point_ptr; - thrust::device_ptr voxel_num_points_ptr; - thrust::device_ptr voxel_means_ptr; - thrust::device_ptr voxel_covs_ptr; + const int* voxel_num_points_ptr; + const Eigen::Vector3f* voxel_means_ptr; + const Eigen::Matrix3f* voxel_covs_ptr; - thrust::device_ptr source_means_ptr; - thrust::device_ptr source_covs_ptr; + const Eigen::Vector3f* source_means_ptr; + const Eigen::Matrix3f* source_covs_ptr; }; struct vgicp_error_kernel { vgicp_error_kernel( - const thrust::device_ptr& linearization_point_ptr, - const thrust::device_ptr& evaluation_point_ptr, + const Eigen::Isometry3f* linearization_point_ptr, + const Eigen::Isometry3f* evaluation_point_ptr, const GaussianVoxelMapGPU& voxelmap, - const thrust::device_ptr& source_means, - const thrust::device_ptr& source_covs) + const Eigen::Vector3f* source_means, + const Eigen::Matrix3f* source_covs) : linearization_point_ptr(linearization_point_ptr), evaluation_point_ptr(evaluation_point_ptr), voxel_num_points_ptr(voxelmap.num_points), @@ -103,21 +103,21 @@ struct vgicp_error_kernel { return 0.0f; } - const Eigen::Isometry3f& xl = *thrust::raw_pointer_cast(linearization_point_ptr); + const Eigen::Isometry3f& xl = *linearization_point_ptr; const Eigen::Matrix3f Rl = xl.linear(); - const Eigen::Isometry3f& xe = *thrust::raw_pointer_cast(evaluation_point_ptr); + const Eigen::Isometry3f& xe = *evaluation_point_ptr; const Eigen::Matrix3f Re = xe.linear(); const Eigen::Vector3f te = xe.translation(); - const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(source_means_ptr)[source_idx]; - const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(source_covs_ptr)[source_idx]; + const Eigen::Vector3f& mean_A = source_means_ptr[source_idx]; + const Eigen::Matrix3f& cov_A = source_covs_ptr[source_idx]; const Eigen::Vector3f transed_mean_A = Re * mean_A + te; - const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[target_idx]; - const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[target_idx]; + const Eigen::Vector3f& mean_B = voxel_means_ptr[target_idx]; + const Eigen::Matrix3f& cov_B = voxel_covs_ptr[target_idx]; - const int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[target_idx]; + const int num_points = voxel_num_points_ptr[target_idx]; const Eigen::Matrix3f RCR = (Rl * cov_A * Rl.transpose()); const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); @@ -126,15 +126,15 @@ struct vgicp_error_kernel { return 0.5f * error.transpose() * RCR_inv * error; } - thrust::device_ptr linearization_point_ptr; - thrust::device_ptr evaluation_point_ptr; + const Eigen::Isometry3f* linearization_point_ptr; + const Eigen::Isometry3f* evaluation_point_ptr; - thrust::device_ptr voxel_num_points_ptr; - thrust::device_ptr voxel_means_ptr; - thrust::device_ptr voxel_covs_ptr; + const int* voxel_num_points_ptr; + const Eigen::Vector3f* voxel_means_ptr; + const Eigen::Matrix3f* voxel_covs_ptr; - thrust::device_ptr source_means_ptr; - thrust::device_ptr source_covs_ptr; + const Eigen::Vector3f* source_means_ptr; + const Eigen::Matrix3f* source_covs_ptr; }; } // namespace gtsam_points diff --git a/include/gtsam_points/cuda/nonlinear_factor_set_gpu.hpp b/include/gtsam_points/cuda/nonlinear_factor_set_gpu.hpp index cc1b69ef..0d7d0c72 100644 --- a/include/gtsam_points/cuda/nonlinear_factor_set_gpu.hpp +++ b/include/gtsam_points/cuda/nonlinear_factor_set_gpu.hpp @@ -99,7 +99,6 @@ class NonlinearFactorSetGPU : public NonlinearFactorSet { unsigned char* buffer; }; - private: CUstream_st* stream; diff --git a/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp index 1d7b1de5..f5f086a5 100644 --- a/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp @@ -94,7 +94,7 @@ void IntegratedColorConsistencyFactor_knn_search(pt.data(), 1, &k_index, &k_sq_dist); + size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); correspondences[i] = (num_found && k_sq_dist < max_correspondence_distance_sq) ? k_index : -1; } } diff --git a/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp index ed245b69..ba1e76f7 100644 --- a/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp @@ -95,7 +95,7 @@ void IntegratedColoredGICPFactor_: size_t k_index = -1; double k_sq_dist = -1; - size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist); + size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); correspondences[i] = (num_found && k_sq_dist < max_correspondence_distance_sq) ? k_index : -1; } diff --git a/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp index 27bfd38b..d96a5211 100644 --- a/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp @@ -159,7 +159,7 @@ void IntegratedCT_GICPFactor_::update_correspondences( size_t k_index = -1; double k_sq_dist = std::numeric_limits::max(); - size_t num_found = this->target_tree->knn_search(transed_pt.data(), 1, &k_index, &k_sq_dist); + size_t num_found = this->target_tree->knn_search(transed_pt.data(), 1, &k_index, &k_sq_dist, this->max_correspondence_distance_sq); if (num_found == 0 || k_sq_dist > this->max_correspondence_distance_sq) { this->correspondences[i] = -1; diff --git a/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp index dd08d490..a3fbb9eb 100644 --- a/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp @@ -196,7 +196,7 @@ void IntegratedCT_ICPFactor_::update_correspondences() size_t k_index = -1; double k_sq_dist = -1; - size_t num_found = target_tree->knn_search(transed_pt.data(), 1, &k_index, &k_sq_dist); + size_t num_found = target_tree->knn_search(transed_pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); if (num_found == 0 || k_sq_dist > max_correspondence_distance_sq) { correspondences[i] = -1; diff --git a/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp index 7f4b11e7..e7482c11 100644 --- a/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_gicp_factor_impl.hpp @@ -105,6 +105,10 @@ void IntegratedGICPFactor_::update_correspondences(con } } + if (do_update) { + last_correspondence_point = delta; + } + correspondences.resize(frame::size(*source)); mahalanobis.resize(frame::size(*source)); @@ -115,7 +119,7 @@ void IntegratedGICPFactor_::update_correspondences(con size_t k_index = -1; double k_sq_dist = -1; - size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist); + size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); correspondences[i] = (num_found && k_sq_dist < max_correspondence_distance_sq) ? k_index : -1; } @@ -130,8 +134,6 @@ void IntegratedGICPFactor_::update_correspondences(con mahalanobis[i](3, 3) = 0.0; } } - - last_correspondence_point = delta; } template diff --git a/include/gtsam_points/factors/impl/integrated_icp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_icp_factor_impl.hpp index bc84c541..4be5f2f4 100644 --- a/include/gtsam_points/factors/impl/integrated_icp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_icp_factor_impl.hpp @@ -119,7 +119,7 @@ void IntegratedICPFactor_::update_correspondences(cons size_t k_index = -1; double k_sq_dist = -1; - size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist); + size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); if (num_found == 0 || k_sq_dist > max_correspondence_distance_sq) { correspondences[i] = -1; diff --git a/include/gtsam_points/factors/impl/integrated_loam_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_loam_factor_impl.hpp index 68a0c276..bc8922dc 100644 --- a/include/gtsam_points/factors/impl/integrated_loam_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_loam_factor_impl.hpp @@ -70,7 +70,7 @@ void IntegratedPointToPlaneFactor_::update_corresponde std::array k_indices; std::array k_sq_dists; - size_t num_found = target_tree->knn_search(pt.data(), 3, k_indices.data(), k_sq_dists.data()); + size_t num_found = target_tree->knn_search(pt.data(), 3, k_indices.data(), k_sq_dists.data(), max_correspondence_distance_sq); if (num_found < 3 || k_sq_dists.back() > max_correspondence_distance_sq) { correspondences[i] = std::make_tuple(-1, -1, -1); @@ -236,7 +236,7 @@ void IntegratedPointToEdgeFactor_::update_corresponden std::array k_indices; std::array k_sq_dists; - size_t num_found = target_tree->knn_search(pt.data(), 2, k_indices.data(), k_sq_dists.data()); + size_t num_found = target_tree->knn_search(pt.data(), 2, k_indices.data(), k_sq_dists.data(), max_correspondence_distance_sq); if (num_found < 2 || k_sq_dists.back() > max_correspondence_distance_sq) { correspondences[i] = std::make_tuple(-1, -1); diff --git a/include/gtsam_points/factors/integrated_matching_cost_factor.hpp b/include/gtsam_points/factors/integrated_matching_cost_factor.hpp index 056ed354..1dae7841 100644 --- a/include/gtsam_points/factors/integrated_matching_cost_factor.hpp +++ b/include/gtsam_points/factors/integrated_matching_cost_factor.hpp @@ -41,6 +41,8 @@ class IntegratedMatchingCostFactor : public gtsam::NonlinearFactor { virtual double error(const gtsam::Values& values) const override; virtual boost::shared_ptr linearize(const gtsam::Values& values) const override; + const Eigen::Isometry3d& get_fixed_target_pose() const { return fixed_target_pose; } + public: Eigen::Isometry3d calc_delta(const gtsam::Values& values) const; diff --git a/include/gtsam_points/factors/integrated_vgicp_derivatives.cuh b/include/gtsam_points/factors/integrated_vgicp_derivatives.cuh index e9ec5c4c..d57d8d45 100644 --- a/include/gtsam_points/factors/integrated_vgicp_derivatives.cuh +++ b/include/gtsam_points/factors/integrated_vgicp_derivatives.cuh @@ -7,10 +7,6 @@ #include #include -#include -#include -#include - #include struct CUstream_st; @@ -41,24 +37,18 @@ public: // synchronized interface LinearizedSystem6 linearize(const Eigen::Isometry3f& x); double compute_error(const Eigen::Isometry3f& xl, const Eigen::Isometry3f& xe); - void update_inliers(const Eigen::Isometry3f& x, const thrust::device_ptr& x_ptr, bool force_update = false); + void update_inliers(const Eigen::Isometry3f& x, const Eigen::Isometry3f* d_x, bool force_update = false); // async interface void sync_stream(); - void issue_linearize(const thrust::device_ptr& x, const thrust::device_ptr& output); - void issue_compute_error( - const thrust::device_ptr& xl, - const thrust::device_ptr& xe, - const thrust::device_ptr& output); + void issue_linearize(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output); + void issue_compute_error(const Eigen::Isometry3f* d_xl, const Eigen::Isometry3f* d_xe, float* d_output); template - void issue_linearize_impl(const thrust::device_ptr& x, const thrust::device_ptr& output); + void issue_linearize_impl(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output); template - void issue_compute_error_impl( - const thrust::device_ptr& xl, - const thrust::device_ptr& xe, - const thrust::device_ptr& output); + void issue_compute_error_impl(const Eigen::Isometry3f* d_xl, const Eigen::Isometry3f* d_xe, float* d_output); private: bool enable_surface_validation; @@ -66,7 +56,7 @@ private: double inlier_update_thresh_angle; bool external_stream; - cudaStream_t stream; + CUstream_st* stream; std::shared_ptr temp_buffer; GaussianVoxelMapGPU::ConstPtr target; diff --git a/src/gtsam_points/ann/incremental_covariance_voxelmap.cpp b/src/gtsam_points/ann/incremental_covariance_voxelmap.cpp index 5f70215b..e6dd2947 100644 --- a/src/gtsam_points/ann/incremental_covariance_voxelmap.cpp +++ b/src/gtsam_points/ann/incremental_covariance_voxelmap.cpp @@ -190,18 +190,18 @@ void IncrementalCovarianceVoxelMap::insert(const PointCloud& points) { prof.push("done"); } -size_t IncrementalCovarianceVoxelMap::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { - return IncrementalVoxelMap::knn_search(pt, k, k_indices, k_sq_dists); +size_t IncrementalCovarianceVoxelMap::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist) const { + return IncrementalVoxelMap::knn_search(pt, k, k_indices, k_sq_dists, max_sq_dist); } -size_t IncrementalCovarianceVoxelMap::knn_search_force(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { +size_t IncrementalCovarianceVoxelMap::knn_search_force(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist) const { const Eigen::Vector4d query = (Eigen::Vector4d() << pt[0], pt[1], pt[2], 1.0).finished(); const Eigen::Vector3i center = fast_floor(query * inv_leaf_size).template head<3>(); size_t voxel_index = 0; const auto index_transform = [&](const size_t point_index) { return calc_index(voxel_index, point_index); }; - KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform); + KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform, max_sq_dist); for (const auto& offset : offsets) { const Eigen::Vector3i coord = center + offset; const auto found = voxels.find(coord); diff --git a/src/gtsam_points/ann/intensity_kdtree.cpp b/src/gtsam_points/ann/intensity_kdtree.cpp index 71013721..460aa28d 100644 --- a/src/gtsam_points/ann/intensity_kdtree.cpp +++ b/src/gtsam_points/ann/intensity_kdtree.cpp @@ -18,7 +18,7 @@ IntensityKdTree::IntensityKdTree(const Eigen::Vector4d* points, const double* in IntensityKdTree::~IntensityKdTree() {} -size_t IntensityKdTree::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { +size_t IntensityKdTree::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist) const { if (search_eps > 0.0) { nanoflann::KNNResultSet result_set(k); result_set.init(k_indices, k_sq_dists); diff --git a/src/gtsam_points/ann/kdtree.cpp b/src/gtsam_points/ann/kdtree.cpp index a136f152..af67c4e9 100644 --- a/src/gtsam_points/ann/kdtree.cpp +++ b/src/gtsam_points/ann/kdtree.cpp @@ -26,8 +26,10 @@ KdTree::KdTree(const Eigen::Vector4d* points, int num_points, int build_num_thre KdTree::~KdTree() {} -size_t KdTree::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { - return index->knn_search(Eigen::Map(pt), k, k_indices, k_sq_dists); +size_t KdTree::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist) const { + KnnSetting setting; + setting.max_sq_dist = max_sq_dist; + return index->knn_search(Eigen::Map(pt), k, k_indices, k_sq_dists, setting); } }; // namespace gtsam_points \ No newline at end of file diff --git a/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu b/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp similarity index 93% rename from src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu rename to src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp index bb771c35..85c6387b 100644 --- a/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu +++ b/src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp @@ -4,7 +4,6 @@ #include #include -#include #include namespace gtsam_points { @@ -12,14 +11,14 @@ namespace gtsam_points { NonlinearFactorSetGPU::DeviceBuffer::DeviceBuffer() : size(0), buffer(nullptr) {} NonlinearFactorSetGPU::DeviceBuffer::~DeviceBuffer() { - if(buffer) { + if (buffer) { check_error << cudaFreeAsync(buffer, 0); } } void NonlinearFactorSetGPU::DeviceBuffer::resize(size_t size, CUstream_st* stream) { - if(this->size < size) { - if(buffer) { + if (this->size < size) { + if (buffer) { check_error << cudaFreeAsync(buffer, stream); } check_error << cudaMallocAsync(&buffer, size, stream); @@ -163,12 +162,8 @@ void NonlinearFactorSetGPU::error(const gtsam::Values& values) { } // copy input buffer from cpu to gpu - check_error << cudaMemcpyAsync( - evaluation_input_buffer_gpu->data(), - evaluation_input_buffer_cpu.data(), - input_buffer_size, - cudaMemcpyHostToDevice, - stream); + check_error + << cudaMemcpyAsync(evaluation_input_buffer_gpu->data(), evaluation_input_buffer_cpu.data(), input_buffer_size, cudaMemcpyHostToDevice, stream); check_error << cudaStreamSynchronize(stream); // issue error computation @@ -195,12 +190,8 @@ void NonlinearFactorSetGPU::error(const gtsam::Values& values) { } // copy output buffer from gpu to cpu - check_error << cudaMemcpyAsync( - evaluation_output_buffer_cpu.data(), - evaluation_output_buffer_gpu->data(), - output_buffer_size, - cudaMemcpyDeviceToHost, - stream); + check_error + << cudaMemcpyAsync(evaluation_output_buffer_cpu.data(), evaluation_output_buffer_gpu->data(), output_buffer_size, cudaMemcpyDeviceToHost, stream); check_error << cudaStreamSynchronize(stream); // store computed results diff --git a/src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu b/src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp similarity index 94% rename from src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu rename to src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp index 7acf01e5..5ab2f5ac 100644 --- a/src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu +++ b/src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp @@ -6,7 +6,6 @@ #include #include -#include #include #include diff --git a/src/gtsam_points/factors/integrated_vgicp_derivatives.cu b/src/gtsam_points/factors/integrated_vgicp_derivatives.cu index fbc60f7f..9b07cfe8 100644 --- a/src/gtsam_points/factors/integrated_vgicp_derivatives.cu +++ b/src/gtsam_points/factors/integrated_vgicp_derivatives.cu @@ -64,8 +64,8 @@ LinearizedSystem6 IntegratedVGICPDerivatives::linearize(const Eigen::Isometry3f& x_ptr[0] = x; - update_inliers(x, x_ptr.data()); - issue_linearize(x_ptr.data(), output_ptr.data()); + update_inliers(x, thrust::raw_pointer_cast(x_ptr.data())); + issue_linearize(thrust::raw_pointer_cast(x_ptr.data()), thrust::raw_pointer_cast(output_ptr.data())); sync_stream(); LinearizedSystem6 linearized = output_ptr[0]; @@ -73,38 +73,36 @@ LinearizedSystem6 IntegratedVGICPDerivatives::linearize(const Eigen::Isometry3f& return linearized; } -double IntegratedVGICPDerivatives::compute_error(const Eigen::Isometry3f& xl, const Eigen::Isometry3f& xe) { +double IntegratedVGICPDerivatives::compute_error(const Eigen::Isometry3f& d_xl, const Eigen::Isometry3f& d_xe) { thrust::device_vector xs_ptr(2); - xs_ptr[0] = xl; - xs_ptr[1] = xe; + xs_ptr[0] = d_xl; + xs_ptr[1] = d_xe; thrust::device_vector output_ptr(1); - issue_compute_error(xs_ptr.data(), xs_ptr.data() + 1, output_ptr.data()); + issue_compute_error( + thrust::raw_pointer_cast(xs_ptr.data()), + thrust::raw_pointer_cast(xs_ptr.data() + 1), + thrust::raw_pointer_cast(output_ptr.data())); sync_stream(); float error = output_ptr[0]; return error; } -void IntegratedVGICPDerivatives::issue_linearize( - const thrust::device_ptr& x, - const thrust::device_ptr& output) { +void IntegratedVGICPDerivatives::issue_linearize(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output) { if (enable_surface_validation) { - issue_linearize_impl(x, output); + issue_linearize_impl(d_x, d_output); } else { - issue_linearize_impl(x, output); + issue_linearize_impl(d_x, d_output); } } -void IntegratedVGICPDerivatives::issue_compute_error( - const thrust::device_ptr& xl, - const thrust::device_ptr& xe, - const thrust::device_ptr& output) { +void IntegratedVGICPDerivatives::issue_compute_error(const Eigen::Isometry3f* d_xl, const Eigen::Isometry3f* d_xe, float* d_output) { // if (enable_surface_validation) { - issue_compute_error_impl(xl, xe, output); + issue_compute_error_impl(d_xl, d_xe, d_output); } else { - issue_compute_error_impl(xl, xe, output); + issue_compute_error_impl(d_xl, d_xe, d_output); } } diff --git a/src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu b/src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu index 213633cb..b66a7644 100644 --- a/src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu +++ b/src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu @@ -22,59 +22,25 @@ namespace gtsam_points { template -void IntegratedVGICPDerivatives::issue_compute_error_impl( - const thrust::device_ptr& xl, - const thrust::device_ptr& xe, - const thrust::device_ptr& output) { +void IntegratedVGICPDerivatives::issue_compute_error_impl(const Eigen::Isometry3f* d_xl, const Eigen::Isometry3f* d_xe, float* d_output) { // - lookup_voxels_kernel corr_kernel( - *target, - thrust::device_ptr(source->points_gpu), - thrust::device_ptr(source->normals_gpu), - xl); + lookup_voxels_kernel corr_kernel(*target, source->points_gpu, source->normals_gpu, d_xl); cub::TransformInputIterator, lookup_voxels_kernel, int*> corr_first(source_inliers, corr_kernel); - vgicp_error_kernel error_kernel( - xl, - xe, - *target, - thrust::device_ptr(source->points_gpu), - thrust::device_ptr(source->covs_gpu)); + vgicp_error_kernel error_kernel(d_xl, d_xe, *target, source->points_gpu, source->covs_gpu); cub::TransformInputIterator first(corr_first, error_kernel); void* temp_storage = nullptr; size_t temp_storage_bytes = 0; - cub::DeviceReduce::Reduce( - temp_storage, - temp_storage_bytes, - first, - thrust::raw_pointer_cast(output), - num_inliers, - thrust::plus(), - 0.0f, - stream); + cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, first, d_output, num_inliers, thrust::plus(), 0.0f, stream); temp_storage = temp_buffer->get_buffer(temp_storage_bytes); - cub::DeviceReduce::Reduce( - temp_storage, - temp_storage_bytes, - first, - thrust::raw_pointer_cast(output), - num_inliers, - thrust::plus(), - 0.0f, - stream); + cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, first, d_output, num_inliers, thrust::plus(), 0.0f, stream); } -template void IntegratedVGICPDerivatives::issue_compute_error_impl( - const thrust::device_ptr&, - const thrust::device_ptr&, - const thrust::device_ptr&); -template void IntegratedVGICPDerivatives::issue_compute_error_impl( - const thrust::device_ptr&, - const thrust::device_ptr&, - const thrust::device_ptr&); +template void IntegratedVGICPDerivatives::issue_compute_error_impl(const Eigen::Isometry3f*, const Eigen::Isometry3f*, float*); +template void IntegratedVGICPDerivatives::issue_compute_error_impl(const Eigen::Isometry3f*, const Eigen::Isometry3f*, float*); } // namespace gtsam_points \ No newline at end of file diff --git a/src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu b/src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu index 163a2bca..462be8b2 100644 --- a/src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu +++ b/src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu @@ -29,10 +29,7 @@ struct valid_inlier_kernel { namespace gtsam_points { -void IntegratedVGICPDerivatives::update_inliers( - const Eigen::Isometry3f& x, - const thrust::device_ptr& x_ptr, - bool force_update) { +void IntegratedVGICPDerivatives::update_inliers(const Eigen::Isometry3f& x, const Eigen::Isometry3f* d_x, bool force_update) { if ( force_update || source_inliers == nullptr || large_displacement(inlier_evaluation_point, x, inlier_update_thresh_trans, inlier_update_thresh_angle)) { @@ -44,23 +41,15 @@ void IntegratedVGICPDerivatives::update_inliers( check_error << cudaMallocAsync(&valid_inliers, sizeof(int) * source->size(), stream); if (enable_surface_validation) { - lookup_voxels_kernel kernel( - *target, - thrust::device_ptr(source->points_gpu), - thrust::device_ptr(source->normals_gpu), - x_ptr); + lookup_voxels_kernel kernel(*target, source->points_gpu, source->normals_gpu, d_x); auto corr_first = thrust::make_transform_iterator(thrust::counting_iterator(0), kernel); auto corr_last = thrust::make_transform_iterator(thrust::counting_iterator(source->size()), kernel); - thrust::transform(thrust::cuda::par_nosync.on(stream), corr_first, corr_last, thrust::device_ptr(inliers), untie_pair_first()); + thrust::transform(thrust::cuda::par_nosync.on(stream), corr_first, corr_last, inliers, untie_pair_first()); } else { - lookup_voxels_kernel kernel( - *target, - thrust::device_ptr(source->points_gpu), - thrust::device_ptr(source->normals_gpu), - x_ptr); + lookup_voxels_kernel kernel(*target, source->points_gpu, source->normals_gpu, d_x); auto corr_first = thrust::make_transform_iterator(thrust::counting_iterator(0), kernel); auto corr_last = thrust::make_transform_iterator(thrust::counting_iterator(source->size()), kernel); - thrust::transform(thrust::cuda::par_nosync.on(stream), corr_first, corr_last, thrust::device_ptr(inliers), untie_pair_first()); + thrust::transform(thrust::cuda::par_nosync.on(stream), corr_first, corr_last, inliers, untie_pair_first()); } void* temp_storage = nullptr; diff --git a/src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu b/src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu index 3bc99c93..6cf49f6e 100644 --- a/src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu +++ b/src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu @@ -22,22 +22,12 @@ namespace gtsam_points { template -void IntegratedVGICPDerivatives::issue_linearize_impl( - const thrust::device_ptr& x, - const thrust::device_ptr& output) { +void IntegratedVGICPDerivatives::issue_linearize_impl(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output) { // - lookup_voxels_kernel corr_kernel( - *target, - thrust::device_ptr(source->points_gpu), - thrust::device_ptr(source->normals_gpu), - x); + lookup_voxels_kernel corr_kernel(*target, source->points_gpu, source->normals_gpu, d_x); cub::TransformInputIterator, lookup_voxels_kernel, int*> corr_first(source_inliers, corr_kernel); - vgicp_derivatives_kernel deriv_kernel( - x, - *target, - thrust::device_ptr(source->points_gpu), - thrust::device_ptr(source->covs_gpu)); + vgicp_derivatives_kernel deriv_kernel(d_x, *target, source->points_gpu, source->covs_gpu); cub::TransformInputIterator first(corr_first, deriv_kernel); void* temp_storage = nullptr; @@ -47,7 +37,7 @@ void IntegratedVGICPDerivatives::issue_linearize_impl( temp_storage, temp_storage_bytes, first, - thrust::raw_pointer_cast(output), + d_output, num_inliers, thrust::plus(), LinearizedSystem6::zero(), @@ -59,18 +49,14 @@ void IntegratedVGICPDerivatives::issue_linearize_impl( temp_storage, temp_storage_bytes, first, - thrust::raw_pointer_cast(output), + d_output, num_inliers, thrust::plus(), LinearizedSystem6::zero(), stream); } -template void IntegratedVGICPDerivatives::issue_linearize_impl( - const thrust::device_ptr& x, - const thrust::device_ptr& output); -template void IntegratedVGICPDerivatives::issue_linearize_impl( - const thrust::device_ptr& x, - const thrust::device_ptr& output); +template void IntegratedVGICPDerivatives::issue_linearize_impl(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output); +template void IntegratedVGICPDerivatives::issue_linearize_impl(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output); } // namespace gtsam_points \ No newline at end of file diff --git a/src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu b/src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp similarity index 91% rename from src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu rename to src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp index 43444a73..1ddc0bc2 100644 --- a/src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu +++ b/src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp @@ -3,6 +3,8 @@ #include +#include + #include #include @@ -194,13 +196,10 @@ void IntegratedVGICPFactorGPU::set_evaluation_point(const gtsam::Values& values, *evaluation_point = calc_delta(values); } -void IntegratedVGICPFactorGPU::issue_linearize( - const void* lin_input_cpu, - const void* lin_input_gpu, - void* lin_output_gpu) { +void IntegratedVGICPFactorGPU::issue_linearize(const void* lin_input_cpu, const void* lin_input_gpu, void* lin_output_gpu) { auto linearization_point = reinterpret_cast(lin_input_cpu); - auto linearization_point_gpu = thrust::reinterpret_pointer_cast>(lin_input_gpu); - auto linearized_gpu = thrust::reinterpret_pointer_cast>(lin_output_gpu); + auto linearization_point_gpu = reinterpret_cast(lin_input_gpu); + auto linearized_gpu = reinterpret_cast(lin_output_gpu); derivatives->update_inliers(*linearization_point, linearization_point_gpu); derivatives->issue_linearize(linearization_point_gpu, linearized_gpu); @@ -222,10 +221,10 @@ void IntegratedVGICPFactorGPU::issue_compute_error( auto linearization_point = reinterpret_cast(lin_input_cpu); auto evaluation_point = reinterpret_cast(eval_input_cpu); - auto linearization_point_gpu = thrust::reinterpret_pointer_cast>(lin_input_gpu); - auto evaluation_point_gpu = thrust::reinterpret_pointer_cast>(eval_input_gpu); + auto linearization_point_gpu = reinterpret_cast(lin_input_gpu); + auto evaluation_point_gpu = reinterpret_cast(eval_input_gpu); - auto error_gpu = thrust::reinterpret_pointer_cast>(eval_output_gpu); + auto error_gpu = reinterpret_cast(eval_output_gpu); derivatives->issue_compute_error(linearization_point_gpu, evaluation_point_gpu, error_gpu); } diff --git a/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu b/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu index 8320f1a9..36e6f5cd 100644 --- a/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu +++ b/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu @@ -130,7 +130,7 @@ public: const auto& trans = *thrust::raw_pointer_cast(delta_ptr); Eigen::Vector3f x_ = trans.linear() * x + trans.translation(); - int voxel_index = lookup_voxel(info.max_bucket_scan_count, info.num_buckets, buckets_ptr, info.voxel_resolution, x_); + int voxel_index = lookup_voxel(info.max_bucket_scan_count, info.num_buckets, thrust::raw_pointer_cast(buckets_ptr), info.voxel_resolution, x_); return voxel_index >= 0; } diff --git a/src/test/test_kdtree.cpp b/src/test/test_kdtree.cpp new file mode 100644 index 00000000..427ab6c5 --- /dev/null +++ b/src/test/test_kdtree.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +class KdTreeTest : public testing::Test, public testing::WithParamInterface { + virtual void SetUp() { + const int num_points = 1000; + const int num_queries = 100; + + std::mt19937 mt; + std::uniform_real_distribution<> udist(-1.0, 1.0); + + points.resize(num_points); + for (int i = 0; i < num_points; i++) { + points[i] << 100.0 * udist(mt), 100.0 * udist(mt), 100.0 * udist(mt), 1.0; + } + + queries.resize(num_queries); + for (int i = 0; i < num_queries / 2; i++) { + queries[i] << 100.0 * udist(mt), 100.0 * udist(mt), 100.0 * udist(mt), 1.0; + } + for (int i = num_queries / 2; i < num_queries; i++) { + queries[i] << 200.0 * udist(mt), 200.0 * udist(mt), 200.0 * udist(mt), 1.0; + } + + constexpr int max_k = 20; + gt_indices.resize(num_queries); + gt_sq_dists.resize(num_queries); + + for (int i = 0; i < num_queries; i++) { + std::vector> dists(num_points); + for (int j = 0; j < num_points; j++) { + dists[j] = {j, (points[j] - queries[i]).squaredNorm()}; + } + + std::sort(dists.begin(), dists.end(), [](const auto& a, const auto& b) { return a.second < b.second; }); + + gt_indices[i].resize(max_k); + gt_sq_dists[i].resize(max_k); + for (int j = 0; j < max_k; j++) { + gt_indices[i][j] = dists[j].first; + gt_sq_dists[i][j] = dists[j].second; + } + } + } + +public: + std::vector points; + std::vector queries; + std::vector> gt_indices; + std::vector> gt_sq_dists; +}; + +TEST_F(KdTreeTest, LoadCheck) { + ASSERT_NE(points.size(), 0); + ASSERT_NE(queries.size(), 0); + ASSERT_EQ(gt_indices.size(), queries.size()); + ASSERT_EQ(gt_sq_dists.size(), queries.size()); +} + +INSTANTIATE_TEST_SUITE_P(gtsam_points, KdTreeTest, testing::Values("KdTree", "KdTreeMT", "KdTree2", "KdTree2MT"), [](const auto& info) { + return info.param; +}); + +TEST_P(KdTreeTest, KdTreeTest) { + gtsam_points::NearestNeighborSearch::ConstPtr kdtree; + + if (GetParam() == "KdTree") { + kdtree = std::make_shared(points.data(), points.size()); + } else if (GetParam() == "KdTreeMT") { + kdtree = std::make_shared(points.data(), points.size(), 2); + } else if (GetParam() == "KdTree2") { + auto pts = std::make_shared(points); + kdtree = std::make_shared>(pts); + } else if (GetParam() == "KdTree2MT") { + auto pts = std::make_shared(points); + kdtree = std::make_shared>(pts, 2); + } else { + FAIL() << "Unknown KdTree type: " << GetParam(); + } + + const std::vector ks = {1, 2, 3, 5, 10, 15, 20}; + const double max_sq_dist = 10.0; + + for (int k : ks) { + for (int i = 0; i < queries.size(); i++) { + const auto& query = queries[i]; + + std::vector k_indices(k); + std::vector k_sq_dists(k); + + const auto num_found = kdtree->knn_search(query.data(), k, k_indices.data(), k_sq_dists.data()); + EXPECT_EQ(num_found, k); + + for (int j = 0; j < k; j++) { + const double sq_dist = (points[k_indices[j]] - query).squaredNorm(); + EXPECT_NEAR(k_sq_dists[j], sq_dist, 1.0e-6); + EXPECT_NEAR(k_sq_dists[j], gt_sq_dists[i][j], 1.0e-6); + } + } + + for (int i = 0; i < queries.size(); i++) { + const auto& query = queries[i]; + + std::vector k_indices(k); + std::vector k_sq_dists(k); + + const auto num_found = kdtree->knn_search(query.data(), k, k_indices.data(), k_sq_dists.data(), max_sq_dist); + EXPECT_LE(num_found, k); + + for (int j = 0; j < num_found; j++) { + const double sq_dist = (points[k_indices[j]] - query).squaredNorm(); + EXPECT_NEAR(k_sq_dists[j], sq_dist, 1.0e-6); + EXPECT_NEAR(k_sq_dists[j], gt_sq_dists[i][j], 1.0e-6); + } + + for (int j = num_found; j < k; j++) { + EXPECT_GT(gt_sq_dists[i][j], max_sq_dist); + } + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file