diff --git a/.gitmodules b/.gitmodules index 7c009e8039..3ff2c85c42 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "cpp/vcpkg"] path = cpp/vcpkg url = https://github.com/microsoft/vcpkg.git +[submodule "cpp/third_party/sparrow"] + path = cpp/third_party/sparrow + url = https://github.com/man-group/sparrow.git diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt index 977765451f..9506767188 100644 --- a/cpp/arcticdb/CMakeLists.txt +++ b/cpp/arcticdb/CMakeLists.txt @@ -65,6 +65,7 @@ else() find_package(PCRE REQUIRED) find_package(Libevent REQUIRED) find_package(semimap REQUIRED) + find_package(sparrow REQUIRED) find_package(recycle REQUIRED) find_package(msgpack-c REQUIRED) @@ -503,7 +504,12 @@ set(arcticdb_srcs version/symbol_list.cpp version/version_map_batch_methods.cpp storage/s3/ec2_utils.cpp - util/buffer_holder.cpp) + util/buffer_holder.cpp + util/native_handler.hpp + arrow/arrow_output_frame.hpp + arrow/arrow_output_frame.cpp + arrow/arrow_utils.hpp + arrow/test/test_arrow.cpp) add_library(arcticdb_core_object OBJECT ${arcticdb_srcs}) diff --git a/cpp/arcticdb/arrow/arrow_output_frame.cpp b/cpp/arcticdb/arrow/arrow_output_frame.cpp new file mode 100644 index 0000000000..7a9ff15a3c --- /dev/null +++ b/cpp/arcticdb/arrow/arrow_output_frame.cpp @@ -0,0 +1,22 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + + +#include + +namespace arcticdb { + +ArrowOutputFrame::ArrowOutputFrame(const SegmentInMemory &frame, std::shared_ptr buffers) : + module_data_(ModuleData::instance()), + frame_(frame), + names_(frame.fields().size() - frame.descriptor().index().field_count()), + index_columns_(frame.descriptor().index().field_count()), + buffers_(std::move(buffers)) { + +} + +} // namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/arrow/arrow_output_frame.hpp b/cpp/arcticdb/arrow/arrow_output_frame.hpp new file mode 100644 index 0000000000..4727d664cf --- /dev/null +++ b/cpp/arcticdb/arrow/arrow_output_frame.hpp @@ -0,0 +1,30 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#pragma once + +#include +#include + +#include + +namespace arcticdb { + +class SegmentInMemory; + +class ArrowOutputFrame { + ArrowOutputFrame(const SegmentInMemory& frame, std::shared_ptr buffers); + +private: + std::shared_ptr module_data_; + SegmentInMemory frame_; + std::vector names_; + std::vector index_columns_; + std::weak_ptr arrays_; + std::shared_ptr buffers_; +}; +} diff --git a/cpp/arcticdb/arrow/arrow_utils.hpp b/cpp/arcticdb/arrow/arrow_utils.hpp new file mode 100644 index 0000000000..789e232338 --- /dev/null +++ b/cpp/arcticdb/arrow/arrow_utils.hpp @@ -0,0 +1,51 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace arcticdb { +/* +sparrow::arrow_array_unique_ptr arrow_data_from_column(const Column& column) { + return column.type().visit_tag([&](auto && impl) -> sparrow::arrow_array_unique_ptr { + using TagType = std::decay_t; + using DataType = TagType::DataTypeTag; + using RawType = DataType::raw_type; + if constexpr (!is_sequence_type(DataType::data_type)) { + sparrow::array_data data; + data.type = sparrow::data_descriptor(sparrow::arrow_traits::type_id); + + auto column_data = column.data(); + util::check(column_data.num_blocks() == 1, "Expected single block in arrow conversion"); + auto block = column_data.next().value(); + sparrow::buffer buffer(const_cast(block.data()), block.row_count()); + + data.buffers.push_back(buffer); + data.length = static_cast(block.row_count()); + data.offset = static_cast(0); + data.child_data.emplace_back(); + + return to_arrow_array_unique_ptr(std::move(data)); + } else { + util::raise_rte("Sequence types not implemented"); + } + }); +}; + +std::vector segment_to_arrow_arrays(SegmentInMemory& segment) { + std::vector output; + for(auto& column : segment.column()) { + output.emplace_back(arrow_data_from_column(column)); + } +} +*/ +} // namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/arrow/test/test_arrow.cpp b/cpp/arcticdb/arrow/test/test_arrow.cpp new file mode 100644 index 0000000000..5168ba034c --- /dev/null +++ b/cpp/arcticdb/arrow/test/test_arrow.cpp @@ -0,0 +1,16 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#include + +//#include + +TEST(Arrow, ConvertColumn) { + // using namespace arcticdb; + // Column column; + // auto data = arrow_data_from_column(column); +} \ No newline at end of file diff --git a/cpp/arcticdb/stream/aggregator-inl.hpp b/cpp/arcticdb/stream/aggregator-inl.hpp index 793c643f1c..ab8b967385 100644 --- a/cpp/arcticdb/stream/aggregator-inl.hpp +++ b/cpp/arcticdb/stream/aggregator-inl.hpp @@ -32,16 +32,20 @@ inline void Aggregator::commit_i stats_.reset(); } +inline bool has_something_to_write(const SegmentInMemory& segment) { + return (segment.row_count() > 0 || segment.metadata()) || segment.has_index_descriptor(); +} + template inline void Aggregator::commit() { - if (ARCTICDB_LIKELY(segment_.row_count() > 0 || segment_.metadata()) || segment_.has_index_descriptor()) { + if (ARCTICDB_LIKELY(has_something_to_write(segment_))) { commit_impl(false); } } template inline void Aggregator::finalize() { - if (ARCTICDB_LIKELY(segment_.row_count() > 0 || segment_.metadata()) || segment_.has_index_descriptor()) { + if (ARCTICDB_LIKELY(has_something_to_write(segment_))) { commit_impl(true); } } diff --git a/cpp/third_party/sparrow b/cpp/third_party/sparrow new file mode 160000 index 0000000000..6b0cffcadd --- /dev/null +++ b/cpp/third_party/sparrow @@ -0,0 +1 @@ +Subproject commit 6b0cffcadd4c8d8554eefc1067fa81c2ccb5c288