Skip to content

Commit

Permalink
multibyte binary reader
Browse files Browse the repository at this point in the history
  • Loading branch information
TianyiChen committed Jun 6, 2024
1 parent 8c391e0 commit 28b08d9
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 42 deletions.
77 changes: 56 additions & 21 deletions include/nlohmann/detail/input/binary_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include <string> // char_traits, string
#include <utility> // make_pair, move
#include <vector> // vector
#ifdef __cpp_lib_byteswap
#include <bit> //byteswap
#endif

#include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/input/input_adapters.hpp>
Expand Down Expand Up @@ -2754,6 +2757,29 @@ class binary_reader
return current = ia.get_character();
}

/*!
@brief get_to read into a primitive type
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns false instead
@return bool, whether the read was successful
*/
template<class T>
bool get_to(T& dest, const input_format_t format, const char* context)
{
auto new_chars_read = ia.get_elements(&dest);
chars_read += new_chars_read;
if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T)))
{
// in case of failure, advance position by 1 to report failing location
++chars_read;
sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
return false;
}
return true;
}

/*!
@return character read from the input after ignoring all 'N' entries
*/
Expand All @@ -2768,6 +2794,28 @@ class binary_reader
return current;
}

template<class NumberType>
static void byte_swap(NumberType& number)
{
constexpr std::size_t sz = sizeof(number);
if (sz == 1)
{
return;
}
#ifdef __cpp_lib_byteswap
// convert float types to int types of the same size
using swap_t = std::conditional<sz == 2, std::uint16_t, typename std::conditional<sz == 4, std::uint32_t, std::uint64_t>::type>::type;
swap_t& number_ref = reinterpret_cast<swap_t&>(number);
number_ref = std::byteswap(number_ref);
#else
auto ptr = reinterpret_cast<std::uint8_t*>(&number);
for (std::size_t i = 0; i < sz / 2; ++i)
{
std::swap(ptr[i], ptr[sz - i - 1]);
}
#endif
}

/*
@brief read a number from the input
Expand All @@ -2786,29 +2834,16 @@ class binary_reader
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<std::uint8_t, sizeof(NumberType)> vec{};
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
{
return false;
}
// read in the original format

// reverse byte order prior to conversion if necessary
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
{
vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
}
else
{
vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
}
if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number")))
{
return false;
}
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
{
byte_swap(result);
}

// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}

Expand Down
45 changes: 45 additions & 0 deletions include/nlohmann/detail/input/input_adapters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ class file_input_adapter
return std::fgetc(m_file);
}

// returns the number of characters successfully read
template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
return fread(dest, 1, sizeof(T) * count, m_file);
}

private:
/// the file pointer to read from
std::FILE* m_file;
Expand Down Expand Up @@ -126,6 +133,17 @@ class input_stream_adapter
return res;
}

template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
auto res = sb->sgetn(reinterpret_cast<char*>(dest), count * sizeof(T));
if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
{
is->clear(is->rdstate() | std::ios::eofbit);
}
return res;
}

private:
/// the associated input stream
std::istream* is = nullptr;
Expand Down Expand Up @@ -157,6 +175,27 @@ class iterator_input_adapter
return char_traits<char_type>::eof();
}

// for general iterators, we cannot really do something better than falling back to processing the range one-by-one
template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
size_t successful_read_chars = 0;
auto ptr = reinterpret_cast<char*>(dest);
for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
{
if (JSON_HEDLEY_LIKELY(current != end))
{
ptr[read_index] = *current;
std::advance(current, 1);
}
else
{
return read_index;
}
}
return count * sizeof(T);
}

private:
IteratorType current;
IteratorType end;
Expand Down Expand Up @@ -320,6 +359,12 @@ class wide_string_input_adapter
return utf8_bytes[utf8_bytes_index++];
}

template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
JSON_THROW(other_error::create(500, "Unexpected get_elements call to wchar input adapter", nullptr));
}

private:
BaseInputAdapter base_adapter;

Expand Down
122 changes: 101 additions & 21 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6133,6 +6133,9 @@ NLOHMANN_JSON_NAMESPACE_END
#include <string> // char_traits, string
#include <utility> // make_pair, move
#include <vector> // vector
#ifdef __cpp_lib_byteswap
#include <bit> //byteswap
#endif

// #include <nlohmann/detail/exceptions.hpp>

Expand Down Expand Up @@ -6209,6 +6212,13 @@ class file_input_adapter
return std::fgetc(m_file);
}

// returns the number of characters successfully read
template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
return fread(dest, 1, sizeof(T) * count, m_file);
}

private:
/// the file pointer to read from
std::FILE* m_file;
Expand Down Expand Up @@ -6268,6 +6278,17 @@ class input_stream_adapter
return res;
}

template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
auto res = sb->sgetn(reinterpret_cast<char*>(dest), count * sizeof(T));
if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
{
is->clear(is->rdstate() | std::ios::eofbit);
}
return res;
}

private:
/// the associated input stream
std::istream* is = nullptr;
Expand Down Expand Up @@ -6299,6 +6320,27 @@ class iterator_input_adapter
return char_traits<char_type>::eof();
}

// for general iterators, we cannot really do something better than falling back to processing the range one-by-one
template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
size_t successful_read_chars = 0;
auto ptr = reinterpret_cast<char*>(dest);
for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
{
if (JSON_HEDLEY_LIKELY(current != end))
{
ptr[read_index] = *current;
std::advance(current, 1);
}
else
{
return read_index;
}
}
return count * sizeof(T);
}

private:
IteratorType current;
IteratorType end;
Expand Down Expand Up @@ -6462,6 +6504,12 @@ class wide_string_input_adapter
return utf8_bytes[utf8_bytes_index++];
}

template<class T>
std::size_t get_elements(T* dest, std::size_t count = 1)
{
JSON_THROW(other_error::create(500, "Unexpected get_elements call to wchar input adapter", nullptr));
}

private:
BaseInputAdapter base_adapter;

Expand Down Expand Up @@ -11900,6 +11948,29 @@ class binary_reader
return current = ia.get_character();
}

/*!
@brief get_to read into a primitive type

This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns false instead

@return bool, whether the read was successful
*/
template<class T>
bool get_to(T& dest, const input_format_t format, const char* context)
{
auto new_chars_read = ia.get_elements(&dest);
chars_read += new_chars_read;
if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T)))
{
// in case of failure, advance position by 1 to report failing location
++chars_read;
sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
return false;
}
return true;
}

/*!
@return character read from the input after ignoring all 'N' entries
*/
Expand All @@ -11914,6 +11985,28 @@ class binary_reader
return current;
}

template<class NumberType>
static void byte_swap(NumberType& number)
{
constexpr std::size_t sz = sizeof(number);
if (sz == 1)
{
return;
}
#ifdef __cpp_lib_byteswap
// convert float types to int types of the same size
using swap_t = std::conditional<sz == 2, std::uint16_t, typename std::conditional<sz == 4, std::uint32_t, std::uint64_t>::type>::type;
swap_t& number_ref = reinterpret_cast<swap_t&>(number);
number_ref = std::byteswap(number_ref);
#else
auto ptr = reinterpret_cast<std::uint8_t*>(&number);
for (std::size_t i = 0; i < sz / 2; ++i)
{
std::swap(ptr[i], ptr[sz - i - 1]);
}
#endif
}

/*
@brief read a number from the input

Expand All @@ -11932,29 +12025,16 @@ class binary_reader
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<std::uint8_t, sizeof(NumberType)> vec{};
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
{
return false;
}
// read in the original format

// reverse byte order prior to conversion if necessary
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
{
vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
}
else
{
vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
}
if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number")))
{
return false;
}
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
{
byte_swap(result);
}

// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}

Expand Down

0 comments on commit 28b08d9

Please sign in to comment.