From 28b08d92676a7df0cffe6fa1f150f1d4f301d760 Mon Sep 17 00:00:00 2001 From: Tianyi Chen Date: Wed, 5 Jun 2024 16:09:09 -0700 Subject: [PATCH] multibyte binary reader --- .../nlohmann/detail/input/binary_reader.hpp | 77 ++++++++--- .../nlohmann/detail/input/input_adapters.hpp | 45 +++++++ single_include/nlohmann/json.hpp | 122 +++++++++++++++--- 3 files changed, 202 insertions(+), 42 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index a6e100e761..2c22718870 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -20,6 +20,9 @@ #include // char_traits, string #include // make_pair, move #include // vector +#ifdef __cpp_lib_byteswap + #include //byteswap +#endif #include #include @@ -2754,6 +2757,29 @@ class binary_reader return current = ia.get_character(); } + /*! + @brief get_to read into a primitive type + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns false instead + + @return bool, whether the read was successful + */ + template + bool get_to(T& dest, const input_format_t format, const char* context) + { + auto new_chars_read = ia.get_elements(&dest); + chars_read += new_chars_read; + if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T))) + { + // in case of failure, advance position by 1 to report failing location + ++chars_read; + sax->parse_error(chars_read, "", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); + return false; + } + return true; + } + /*! @return character read from the input after ignoring all 'N' entries */ @@ -2768,6 +2794,28 @@ class binary_reader return current; } + template + static void byte_swap(NumberType& number) + { + constexpr std::size_t sz = sizeof(number); + if (sz == 1) + { + return; + } +#ifdef __cpp_lib_byteswap + // convert float types to int types of the same size + using swap_t = std::conditional::type>::type; + swap_t& number_ref = reinterpret_cast(number); + number_ref = std::byteswap(number_ref); +#else + auto ptr = reinterpret_cast(&number); + for (std::size_t i = 0; i < sz / 2; ++i) + { + std::swap(ptr[i], ptr[sz - i - 1]); + } +#endif + } + /* @brief read a number from the input @@ -2786,29 +2834,16 @@ class binary_reader template bool get_number(const input_format_t format, NumberType& result) { - // step 1: read input into array with system's byte order - std::array vec{}; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number"))) - { - return false; - } + // read in the original format - // reverse byte order prior to conversion if necessary - if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } + if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number"))) + { + return false; + } + if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) + { + byte_swap(result); } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); return true; } diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 33fca3e4b9..97566dce32 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -67,6 +67,13 @@ class file_input_adapter return std::fgetc(m_file); } + // returns the number of characters successfully read + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + return fread(dest, 1, sizeof(T) * count, m_file); + } + private: /// the file pointer to read from std::FILE* m_file; @@ -126,6 +133,17 @@ class input_stream_adapter return res; } + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + auto res = sb->sgetn(reinterpret_cast(dest), count * sizeof(T)); + if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T))) + { + is->clear(is->rdstate() | std::ios::eofbit); + } + return res; + } + private: /// the associated input stream std::istream* is = nullptr; @@ -157,6 +175,27 @@ class iterator_input_adapter return char_traits::eof(); } + // for general iterators, we cannot really do something better than falling back to processing the range one-by-one + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + size_t successful_read_chars = 0; + auto ptr = reinterpret_cast(dest); + for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index) + { + if (JSON_HEDLEY_LIKELY(current != end)) + { + ptr[read_index] = *current; + std::advance(current, 1); + } + else + { + return read_index; + } + } + return count * sizeof(T); + } + private: IteratorType current; IteratorType end; @@ -320,6 +359,12 @@ class wide_string_input_adapter return utf8_bytes[utf8_bytes_index++]; } + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + JSON_THROW(other_error::create(500, "Unexpected get_elements call to wchar input adapter", nullptr)); + } + private: BaseInputAdapter base_adapter; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a858728c4c..94a27aceed 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6133,6 +6133,9 @@ NLOHMANN_JSON_NAMESPACE_END #include // char_traits, string #include // make_pair, move #include // vector +#ifdef __cpp_lib_byteswap + #include //byteswap +#endif // #include @@ -6209,6 +6212,13 @@ class file_input_adapter return std::fgetc(m_file); } + // returns the number of characters successfully read + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + return fread(dest, 1, sizeof(T) * count, m_file); + } + private: /// the file pointer to read from std::FILE* m_file; @@ -6268,6 +6278,17 @@ class input_stream_adapter return res; } + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + auto res = sb->sgetn(reinterpret_cast(dest), count * sizeof(T)); + if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T))) + { + is->clear(is->rdstate() | std::ios::eofbit); + } + return res; + } + private: /// the associated input stream std::istream* is = nullptr; @@ -6299,6 +6320,27 @@ class iterator_input_adapter return char_traits::eof(); } + // for general iterators, we cannot really do something better than falling back to processing the range one-by-one + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + size_t successful_read_chars = 0; + auto ptr = reinterpret_cast(dest); + for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index) + { + if (JSON_HEDLEY_LIKELY(current != end)) + { + ptr[read_index] = *current; + std::advance(current, 1); + } + else + { + return read_index; + } + } + return count * sizeof(T); + } + private: IteratorType current; IteratorType end; @@ -6462,6 +6504,12 @@ class wide_string_input_adapter return utf8_bytes[utf8_bytes_index++]; } + template + std::size_t get_elements(T* dest, std::size_t count = 1) + { + JSON_THROW(other_error::create(500, "Unexpected get_elements call to wchar input adapter", nullptr)); + } + private: BaseInputAdapter base_adapter; @@ -11900,6 +11948,29 @@ class binary_reader return current = ia.get_character(); } + /*! + @brief get_to read into a primitive type + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns false instead + + @return bool, whether the read was successful + */ + template + bool get_to(T& dest, const input_format_t format, const char* context) + { + auto new_chars_read = ia.get_elements(&dest); + chars_read += new_chars_read; + if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T))) + { + // in case of failure, advance position by 1 to report failing location + ++chars_read; + sax->parse_error(chars_read, "", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); + return false; + } + return true; + } + /*! @return character read from the input after ignoring all 'N' entries */ @@ -11914,6 +11985,28 @@ class binary_reader return current; } + template + static void byte_swap(NumberType& number) + { + constexpr std::size_t sz = sizeof(number); + if (sz == 1) + { + return; + } +#ifdef __cpp_lib_byteswap + // convert float types to int types of the same size + using swap_t = std::conditional::type>::type; + swap_t& number_ref = reinterpret_cast(number); + number_ref = std::byteswap(number_ref); +#else + auto ptr = reinterpret_cast(&number); + for (std::size_t i = 0; i < sz / 2; ++i) + { + std::swap(ptr[i], ptr[sz - i - 1]); + } +#endif + } + /* @brief read a number from the input @@ -11932,29 +12025,16 @@ class binary_reader template bool get_number(const input_format_t format, NumberType& result) { - // step 1: read input into array with system's byte order - std::array vec{}; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number"))) - { - return false; - } + // read in the original format - // reverse byte order prior to conversion if necessary - if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } + if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number"))) + { + return false; + } + if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) + { + byte_swap(result); } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); return true; }