Skip to content

Commit

Permalink
Separate benchmarks into different files
Browse files Browse the repository at this point in the history
  • Loading branch information
gonidelis committed Sep 21, 2024
1 parent e0bef31 commit 0a9d815
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 61 deletions.
87 changes: 26 additions & 61 deletions cub/benchmarks/bench/find_if/base.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,32 +33,44 @@
#include <nvbench_helper.cuh>

template <typename T>
struct equals_100
struct equals
{
__device__ bool operator()(T i)
T val;
equals(T _val)
: val(_val)
{}

__device__ __host__ bool operator()(T i)
{
return i == 1;
} // @amd you 'll never find out the secret sauce
return i == val;
}
};

template <typename T>
void find_if(nvbench::state& state, nvbench::type_list<T>)
{
T val = 1;
// set up input
const auto elements = state.get_int64("Elements");
const auto common_prefix = state.get_float64("CommonPrefixRatio");
const auto same_elements = elements * common_prefix;
const auto elements = state.get_int64("Elements");
const auto common_prefix = state.get_float64("MismatchAt");
const auto mismatch_point = elements * common_prefix;

thrust::device_vector<T> dinput(elements, 0);
thrust::fill(dinput.begin() + same_elements, dinput.end(), 1);
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
thrust::device_vector<T> d_result(1);
///

void* d_temp_storage = nullptr;
size_t temp_storage_bytes{};

cub::DeviceFind::FindIf(
d_temp_storage, temp_storage_bytes, dinput.begin(), d_result.begin(), equals_100<int>{}, dinput.size(), 0);
d_temp_storage,
temp_storage_bytes,
thrust::raw_pointer_cast(dinput.data()),
thrust::raw_pointer_cast(d_result.data()),
equals{val},
dinput.size(),
0);

thrust::device_vector<uint8_t> temp_storage(temp_storage_bytes);
d_temp_storage = thrust::raw_pointer_cast(temp_storage.data());
Expand All @@ -67,61 +79,14 @@ void find_if(nvbench::state& state, nvbench::type_list<T>)
cub::DeviceFind::FindIf(
d_temp_storage,
temp_storage_bytes,
dinput.begin(),
d_result.begin(),
equals_100<int>{},
thrust::raw_pointer_cast(dinput.data()),
thrust::raw_pointer_cast(d_result.data()),
equals{val},
dinput.size(),
launch.get_stream());
});
}
NVBENCH_BENCH_TYPES(find_if, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t> /*integral_types*/))
.add_int64_power_of_two_axis("Elements", nvbench::range(16, 28, 4))
.add_float64_axis("CommonPrefixRatio", std::vector{1.0, 0.5, 0.0});

//////////////////////////////////////////////////////
template <typename T>
void thrust_find_if(nvbench::state& state, nvbench::type_list<T>)
{
// set up input
const auto elements = state.get_int64("Elements");
const auto common_prefix = state.get_float64("CommonPrefixRatio");
const auto same_elements = elements * common_prefix;

thrust::device_vector<T> dinput(elements, 0);
thrust::fill(dinput.begin() + same_elements, dinput.end(), 1);
///

caching_allocator_t alloc;
thrust::find_if(policy(alloc), dinput.begin(), dinput.end(), equals_100<int>{});

state.exec(nvbench::exec_tag::no_batch | nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
thrust::find_if(policy(alloc, launch), dinput.begin(), dinput.end(), equals_100<int>{});
});
}
NVBENCH_BENCH_TYPES(thrust_find_if, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t> /*integral_types*/))
.add_int64_power_of_two_axis("Elements", nvbench::range(16, 28, 4))
.add_float64_axis("CommonPrefixRatio", std::vector{1.0, 0.5, 0.0});

//////////////////////////////////////////////////////
template <typename T>
void thrust_count_if(nvbench::state& state, nvbench::type_list<T>)
{
// set up input
const auto elements = state.get_int64("Elements");
const auto common_prefix = state.get_float64("CommonPrefixRatio");
const auto same_elements = elements * common_prefix;

thrust::device_vector<T> dinput(elements, 0);
thrust::fill(dinput.begin() + same_elements, dinput.end(), 1);
///

caching_allocator_t alloc;
thrust::count_if(policy(alloc), dinput.begin(), dinput.end(), equals_100<int>{});

state.exec(nvbench::exec_tag::no_batch | nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
thrust::count_if(policy(alloc, launch), dinput.begin(), dinput.end(), equals_100<int>{});
});
}
NVBENCH_BENCH_TYPES(thrust_count_if, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t> /*integral_types*/))
NVBENCH_BENCH_TYPES(find_if, NVBENCH_TYPE_AXES(fundamental_types))
.add_int64_power_of_two_axis("Elements", nvbench::range(16, 28, 4))
.add_float64_axis("CommonPrefixRatio", std::vector{1.0, 0.5, 0.0});
.add_float64_axis("MismatchAt", std::vector{1.0, 0.5, 0.0});
5 changes: 5 additions & 0 deletions cub/cub/device/device_find_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
#include <cub/thread/thread_operators.cuh>
#include <cub/util_deprecated.cuh>

#include <cassert>

static constexpr int elements_per_thread = 16;
static constexpr int _VECTOR_LOAD_LENGTH = 4;
static constexpr int block_threads = 128;
Expand Down Expand Up @@ -129,6 +131,7 @@ __global__ void find_if(IterBegin begin, IterEnd end, Pred pred, int* result, st
}
}
}

if (syncthreads_or(found))
{
if (threadIdx.x == 0)
Expand Down Expand Up @@ -166,6 +169,8 @@ struct DeviceFind
NumItemsT num_items,
cudaStream_t stream = 0)
{
static_assert(elements_per_thread % _VECTOR_LOAD_LENGTH == 0, "No full tile at the end");

// int items_per_thread = 2;
int tile_size = block_threads * elements_per_thread;
int num_tiles = static_cast<int>(cub::DivideAndRoundUp(num_items, tile_size));
Expand Down
71 changes: 71 additions & 0 deletions thrust/benchmarks/bench/count_if/basic.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/******************************************************************************
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include <thrust/count.h>
#include <thrust/fill.h>

#include "nvbench_helper.cuh"

template <typename T>
struct equals
{
T val;
equals(T _val)
: val(_val)
{}

__device__ __host__ bool operator()(T i)
{
return i == val;
}
};

template <typename T>
void count_if(nvbench::state& state, nvbench::type_list<T>)
{
T val = 1;
// set up input
const auto elements = static_cast<std::size_t>(state.get_int64("Elements"));
const auto common_prefix = state.get_float64("MismatchAt");
const auto mismatch_point = elements * common_prefix;

thrust::device_vector<T> dinput(elements, 0);
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
///

caching_allocator_t alloc;
thrust::count_if(policy(alloc), dinput.begin(), dinput.end(), equals<T>{val});

state.exec(nvbench::exec_tag::no_batch | nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
thrust::count_if(policy(alloc, launch), dinput.begin(), dinput.end(), equals<T>{val});
});
}

NVBENCH_BENCH_TYPES(count_if, NVBENCH_TYPE_AXES(fundamental_types))
.set_name("thrust::count_if")
.add_int64_power_of_two_axis("Elements", nvbench::range(16, 28, 4))
.add_float64_axis("MismatchAt", std::vector{1.0, 0.5, 0.0});
71 changes: 71 additions & 0 deletions thrust/benchmarks/bench/find_if/basic.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/******************************************************************************
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include <thrust/count.h>
#include <thrust/fill.h>

#include "nvbench_helper.cuh"

template <typename T>
struct equals
{
T val;
equals(T _val)
: val(_val)
{}

__device__ __host__ bool operator()(T i)
{
return i == val;
}
};

template <typename T>
void find_if(nvbench::state& state, nvbench::type_list<T>)
{
T val = 1;
// set up input
const auto elements = static_cast<std::size_t>(state.get_int64("Elements"));
const auto common_prefix = state.get_float64("MismatchAt");
const auto mismatch_point = elements * common_prefix;

thrust::device_vector<T> dinput(elements, 0);
thrust::fill(dinput.begin() + mismatch_point, dinput.end(), val);
///

caching_allocator_t alloc;
thrust::find_if(policy(alloc), dinput.begin(), dinput.end(), equals<T>{val});

state.exec(nvbench::exec_tag::no_batch | nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
thrust::find_if(policy(alloc, launch), dinput.begin(), dinput.end(), equals<T>{val});
});
}

NVBENCH_BENCH_TYPES(find_if, NVBENCH_TYPE_AXES(fundamental_types))
.set_name("thrust::find_if")
.add_int64_power_of_two_axis("Elements", nvbench::range(16, 28, 4))
.add_float64_axis("MismatchAt", std::vector{1.0, 0.5, 0.0});

0 comments on commit 0a9d815

Please sign in to comment.