Skip to content

Commit

Permalink
Merge pull request #21 from koide3/tbb
Browse files Browse the repository at this point in the history
Support GTSAM built with TBB
  • Loading branch information
koide3 committed Aug 21, 2024
2 parents f5eb8d3 + 4c7ee15 commit 5010384
Show file tree
Hide file tree
Showing 36 changed files with 321 additions and 226 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ if(BUILD_WITH_CUDA)
src/gtsam_points/cuda/cuda_graph.cu
src/gtsam_points/cuda/cuda_graph_exec.cu
# src/gtsam_points/cuda/gl_buffer_map.cu
src/gtsam_points/cuda/nonlinear_factor_set_gpu.cu
src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cu
src/gtsam_points/cuda/nonlinear_factor_set_gpu.cpp
src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp
src/gtsam_points/cuda/stream_roundrobin.cu
src/gtsam_points/cuda/stream_temp_buffer_roundrobin.cu
# types
Expand All @@ -197,7 +197,7 @@ if(BUILD_WITH_CUDA)
src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu
src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu
src/gtsam_points/factors/integrated_vgicp_derivatives_linearize.cu
src/gtsam_points/factors/integrated_vgicp_factor_gpu.cu
src/gtsam_points/factors/integrated_vgicp_factor_gpu.cpp
# util
src/gtsam_points/util/easy_profiler_cuda.cu
)
Expand Down
4 changes: 2 additions & 2 deletions include/gtsam_points/ann/impl/incremental_voxelmap_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ void IncrementalVoxelMap<VoxelContents>::insert(const PointCloud& points) {
}

template <typename VoxelContents>
size_t IncrementalVoxelMap<VoxelContents>::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const {
size_t IncrementalVoxelMap<VoxelContents>::knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist) const {
const Eigen::Vector4d query = (Eigen::Vector4d() << pt[0], pt[1], pt[2], 1.0).finished();
const Eigen::Vector3i center = fast_floor(query * inv_leaf_size).template head<3>();

size_t voxel_index = 0;
const auto index_transform = [&](const size_t point_index) { return calc_index(voxel_index, point_index); };

KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform);
KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform, max_sq_dist);
for (const auto& offset : offsets) {
const Eigen::Vector3i coord = center + offset;
const auto found = voxels.find(coord);
Expand Down
10 changes: 8 additions & 2 deletions include/gtsam_points/ann/incremental_covariance_voxelmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,16 @@ struct IncrementalCovarianceVoxelMap : public IncrementalVoxelMap<IncrementalCov
virtual void insert(const PointCloud& points) override;

/// @brief Find k-nearest neighbors. This only finds neighbors with valid covariances.
virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override;
virtual size_t knn_search(
const double* pt,
size_t k,
size_t* k_indices,
double* k_sq_dists,
double max_sq_dist = std::numeric_limits<double>::max()) const override;

/// @brief Find k-nearest neighbors. This finds neighbors regardless of the validity of covariances.
size_t knn_search_force(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const;
size_t knn_search_force(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist = std::numeric_limits<double>::max())
const;

/// @brief Get valid point indices. If num_threads is -1, the member variable num_threads is used.
std::vector<size_t> valid_indices(int num_threads = -1) const;
Expand Down
7 changes: 6 additions & 1 deletion include/gtsam_points/ann/incremental_voxelmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ struct IncrementalVoxelMap : public NearestNeighborSearch {
/// @param k_indices Indices of the k nearest neighbors
/// @param k_sq_dists Squared distances of the k nearest neighbors
/// @return Number of found neighbors
virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override;
virtual size_t knn_search(
const double* pt,
size_t k,
size_t* k_indices,
double* k_sq_dists,
double max_sq_dist = std::numeric_limits<double>::max()) const override;

/// @brief Calculate the global point index from the voxel index and the point index.
inline size_t calc_index(const size_t voxel_id, const size_t point_id) const { return (voxel_id << point_id_bits) | point_id; }
Expand Down
7 changes: 6 additions & 1 deletion include/gtsam_points/ann/intensity_kdtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,12 @@ struct IntensityKdTree : public NearestNeighborSearch {
return false;
}

virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override;
virtual size_t knn_search(
const double* pt,
size_t k,
size_t* k_indices,
double* k_sq_dists,
double max_sq_dist = std::numeric_limits<double>::max()) const override;

public:
const int num_points;
Expand Down
7 changes: 6 additions & 1 deletion include/gtsam_points/ann/kdtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ struct KdTree : public NearestNeighborSearch {
/// @param k_indices Indices of k nearest neighbors
/// @param k_sq_dists Squared distances of k nearest neighbors
/// @return Number of neighbors found
virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override;
virtual size_t knn_search(
const double* pt,
size_t k,
size_t* k_indices,
double* k_sq_dists,
double max_sq_dist = std::numeric_limits<double>::max()) const override;

public:
const int num_points;
Expand Down
11 changes: 9 additions & 2 deletions include/gtsam_points/ann/kdtree2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,15 @@ struct KdTree2 : public NearestNeighborSearch {
/// @param k_indices Indices of k nearest neighbors
/// @param k_sq_dists Squared distances of k nearest neighbors
/// @return Number of neighbors found
virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const override {
return index->knn_search(Eigen::Map<const Eigen::Vector3d>(pt), k, k_indices, k_sq_dists);
virtual size_t knn_search(
const double* pt,
size_t k,
size_t* k_indices,
double* k_sq_dists,
double max_sq_dist = std::numeric_limits<double>::max()) const override {
KnnSetting setting;
setting.max_sq_dist = max_sq_dist;
return index->knn_search(Eigen::Map<const Eigen::Vector3d>(pt), k, k_indices, k_sq_dists, setting);
}

public:
Expand Down
13 changes: 10 additions & 3 deletions include/gtsam_points/ann/knn_result.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: MIT
#pragma once

#include <limits>
#include <iostream>

namespace gtsam_points {
Expand All @@ -16,7 +17,8 @@ struct KnnSetting {
}

public:
double epsilon = 0.0; ///< Early termination threshold
double max_sq_dist = std::numeric_limits<double>::max(); ///< Maximum squared distance to search
double epsilon = 0.0; ///< Early termination threshold
};

/// @brief Identity transformation function. (use std::identity instead in C++20)
Expand All @@ -39,7 +41,12 @@ struct KnnResult {
/// @param distances Buffer to store distances (must be larger than k=max(N, num_neighbors))
/// @param num_neighbors Number of neighbors to search (must be -1 for static case N > 0)
/// @param index_transform Index transformation function (e.g., local point index -> global voxel + point index)
explicit KnnResult(size_t* indices, double* distances, int num_neighbors = -1, const IndexTransform& index_transform = IndexTransform())
explicit KnnResult(
size_t* indices,
double* distances,
int num_neighbors = -1,
const IndexTransform& index_transform = IndexTransform(),
double max_sq_dist = std::numeric_limits<double>::max())
: index_transform(index_transform),
capacity(num_neighbors),
num_found_neighbors(0),
Expand All @@ -59,7 +66,7 @@ struct KnnResult {
}

std::fill(this->indices, this->indices + buffer_size(), INVALID);
std::fill(this->distances, this->distances + buffer_size(), std::numeric_limits<double>::max());
std::fill(this->distances, this->distances + buffer_size(), max_sq_dist);
}

/// @brief Buffer size (i.e., Maximum number of neighbors)
Expand Down
6 changes: 5 additions & 1 deletion include/gtsam_points/ann/nearest_neighbor_search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <vector>
#include <limits>

namespace gtsam_points {

Expand All @@ -25,6 +26,9 @@ struct NearestNeighborSearch {
* @param k_indices Indices of k-nearest neighbors
* @param k_sq_dists Squared distances to the neighbors
*/
virtual size_t knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists) const { return 0; };
virtual size_t
knn_search(const double* pt, size_t k, size_t* k_indices, double* k_sq_dists, double max_sq_dist = std::numeric_limits<double>::max()) const {
return 0;
};
};
} // namespace gtsam_points
4 changes: 2 additions & 2 deletions include/gtsam_points/ann/small_kdtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ struct UnsafeKdTree {
/// @return Number of found neighbors
size_t knn_search(const Eigen::Vector3d& query_, int k, size_t* k_indices, double* k_sq_dists, const KnnSetting& setting = KnnSetting()) const {
const Eigen::Vector4d query = (Eigen::Vector4d() << query_, 1.0).finished();
KnnResult<-1> result(k_indices, k_sq_dists, k);
KnnResult<-1> result(k_indices, k_sq_dists, k, identity_transform(), setting.max_sq_dist);
knn_search(query, root, result, setting);
return result.num_found();
}
Expand All @@ -323,7 +323,7 @@ struct UnsafeKdTree {
template <int N>
size_t knn_search(const Eigen::Vector3d& query_, size_t* k_indices, double* k_sq_dists, const KnnSetting& setting = KnnSetting()) const {
const Eigen::Vector4d query = (Eigen::Vector4d() << query_, 1.0).finished();
KnnResult<N> result(k_indices, k_sq_dists);
KnnResult<N> result(k_indices, k_sq_dists, -1, identity_transform(), setting.max_sq_dist);
knn_search(query, root, result, setting);
return result.num_found();
}
Expand Down
1 change: 0 additions & 1 deletion include/gtsam_points/cuda/kernels/linearized_system.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#pragma once

#include <Eigen/Core>
#include <thrust/device_ptr.h>

namespace gtsam_points {

Expand Down
32 changes: 16 additions & 16 deletions include/gtsam_points/cuda/kernels/lookup_voxels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,24 @@ struct lookup_voxels_kernel {

lookup_voxels_kernel(
const GaussianVoxelMapGPU& voxelmap,
const thrust::device_ptr<const Eigen::Vector3f>& points,
const thrust::device_ptr<const Eigen::Vector3f>& normals,
const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr)
const Eigen::Vector3f* points,
const Eigen::Vector3f* normals,
const Eigen::Isometry3f* x_ptr)
: x_ptr(x_ptr),
voxelmap_info_ptr(voxelmap.voxelmap_info_ptr),
buckets_ptr(voxelmap.buckets),
points_ptr(points),
normals_ptr(normals) {}

__host__ __device__ thrust::pair<int, int> operator()(int point_idx) const {
const auto& info = *thrust::raw_pointer_cast(voxelmap_info_ptr);
const auto& info = *voxelmap_info_ptr;

const Eigen::Isometry3f& trans = *thrust::raw_pointer_cast(x_ptr);
const Eigen::Vector3f& x = thrust::raw_pointer_cast(points_ptr)[point_idx];
const Eigen::Isometry3f& trans = *x_ptr;
const Eigen::Vector3f& x = points_ptr[point_idx];
const Eigen::Vector3f transed_x = trans.linear() * x + trans.translation();

if (enable_surface_validation) {
const Eigen::Vector3f& normal = thrust::raw_pointer_cast(normals_ptr)[point_idx];
const Eigen::Vector3f& normal = normals_ptr[point_idx];
const Eigen::Vector3f transed_normal = trans.linear() * normal;

// 0.17 = cos(10 deg)
Expand All @@ -57,17 +57,17 @@ struct lookup_voxels_kernel {
}

__host__ __device__ void operator()(thrust::pair<int, int>& point_voxel_indices) const {
const auto& info = *thrust::raw_pointer_cast(voxelmap_info_ptr);
const auto& info = *voxelmap_info_ptr;

const int point_idx = thrust::get<0>(point_voxel_indices);
int& voxel_idx = thrust::get<1>(point_voxel_indices);

const Eigen::Isometry3f& trans = *thrust::raw_pointer_cast(x_ptr);
const Eigen::Vector3f& x = thrust::raw_pointer_cast(points_ptr)[point_idx];
const Eigen::Isometry3f& trans = *x_ptr;
const Eigen::Vector3f& x = points_ptr[point_idx];
const Eigen::Vector3f transed_x = trans.linear() * x + trans.translation();

if (enable_surface_validation) {
const Eigen::Vector3f& normal = thrust::raw_pointer_cast(normals_ptr)[point_idx];
const Eigen::Vector3f& normal = normals_ptr[point_idx];
const Eigen::Vector3f transed_normal = trans.linear() * normal;

if (transed_x.normalized().dot(transed_normal) > surface_validation_thresh) {
Expand All @@ -84,13 +84,13 @@ struct lookup_voxels_kernel {
// f(50) = 0.643 f(40) = 0.766 f(30) = 0.866 f(20) = 0.940
static const float constexpr surface_validation_thresh = 0.174; // cos(80.0 * M_PI / 180.0)

thrust::device_ptr<const Eigen::Isometry3f> x_ptr;
const Eigen::Isometry3f* x_ptr;

thrust::device_ptr<const VoxelMapInfo> voxelmap_info_ptr;
thrust::device_ptr<const VoxelBucket> buckets_ptr;
const VoxelMapInfo* voxelmap_info_ptr;
const VoxelBucket* buckets_ptr;

thrust::device_ptr<const Eigen::Vector3f> points_ptr;
thrust::device_ptr<const Eigen::Vector3f> normals_ptr;
const Eigen::Vector3f* points_ptr;
const Eigen::Vector3f* normals_ptr;
};

struct invalid_correspondence_kernel {
Expand Down
4 changes: 2 additions & 2 deletions include/gtsam_points/cuda/kernels/vector3_hash.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ inline __host__ __device__ Eigen::Vector3i calc_voxel_coord(const Eigen::Vector3
inline __host__ __device__ int lookup_voxel(
const int max_bucket_scan_count,
const int num_buckets,
const thrust::device_ptr<const VoxelBucket>& buckets_ptr,
const VoxelBucket* buckets_ptr,
const float resolution,
const Eigen::Vector3f& x) {
Eigen::Vector3i coord = calc_voxel_coord(x, resolution);
uint64_t hash = vector3i_hash(coord);

for (int i = 0; i < max_bucket_scan_count; i++) {
uint64_t bucket_index = (hash + i) % num_buckets;
const auto& bucket = thrust::raw_pointer_cast(buckets_ptr)[bucket_index];
const auto& bucket = buckets_ptr[bucket_index];

if (bucket.second < 0) {
return -1;
Expand Down
Loading

0 comments on commit 5010384

Please sign in to comment.