Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster JSON encoding with orjson #493

Merged
merged 11 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# Ensure JSON serialization is part of benchmark:
to_file(open("/dev/null", "w"))

N = 10000
N = 100_000


def run():
Expand Down
16 changes: 16 additions & 0 deletions docs/source/news.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
What's New
==========

1.15.0
^^^^^^

Enhancements:

* Switched to JSON serialization with ``orjson``, which is much faster.

Changes:

* JSON customization is now done with a default function rather than an encoder class.
* ``NaN``, ``inf``, and ``-inf`` are now serialized to JSON as ``null``, as per ``orjson`` this is more standards compliant.

Deprecation and removals:

* The deprecated support for serializing ``bytes`` in JSON log messages has been removed.

1.14.0
^^^^^^

Expand Down
55 changes: 0 additions & 55 deletions eliot/_bytesjson.py

This file was deleted.

89 changes: 67 additions & 22 deletions eliot/_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@

import traceback
import inspect
import json as pyjson
from threading import Lock
from functools import wraps
from io import IOBase
import warnings

import orjson
from pyrsistent import PClass, field

from . import _bytesjson as bytesjson
from zope.interface import Interface, implementer

from ._traceback import write_traceback, TRACEBACK_MESSAGE
from ._message import EXCEPTION_FIELD, MESSAGE_TYPE_FIELD, REASON_FIELD
from ._util import saferepr, safeunicode
from .json import EliotJSONEncoder
from .json import json_default, _encoder_to_default_function
from ._validation import ValidationError


Expand Down Expand Up @@ -260,12 +260,19 @@ class MemoryLogger(object):
not mutate this list.
"""

def __init__(self, encoder=EliotJSONEncoder):
def __init__(self, encoder=None, json_default=json_default):
"""
@param encoder: A JSONEncoder subclass to use when encoding JSON.
@param encoder: DEPRECATED. A JSONEncoder subclass to use when
encoding JSON.

@param json_default: A callable that handles objects the default JSON
serializer can't handle.
"""
json_default = _json_default_from_encoder_and_json_default(
encoder, json_default
)
self._lock = Lock()
self._encoder = encoder
self._json_default = json_default
self.reset()

@exclusively
Expand Down Expand Up @@ -346,7 +353,7 @@ def _validate_message(self, dictionary, serializer):
serializer.serialize(dictionary)

try:
pyjson.dumps(dictionary, cls=self._encoder)
orjson.dumps(dictionary, default=self._json_default)
except Exception as e:
raise TypeError("Message %s doesn't encode to JSON: %s" % (dictionary, e))

Expand Down Expand Up @@ -409,13 +416,31 @@ def reset(self):
self._failed_validations = []


def _json_default_from_encoder_and_json_default(encoder, json_default):
if encoder is not None:
warnings.warn(
"Using a JSON encoder subclass is no longer supported, please switch to using a default function",
DeprecationWarning,
stacklevel=3,
)
from .json import json_default as default_json_default

if json_default is not default_json_default:
raise RuntimeError("Can't pass in both encoder and default function")

json_default = _encoder_to_default_function(encoder())
return json_default


def _unicode_dumps(o, default):
"""Like orjson.dumps(), but return Unicode."""
return orjson.dumps(o, default=default).decode("utf-8")


class FileDestination(PClass):
"""
Callable that writes JSON messages to a file.

On Python 3 the file may support either C{bytes} or C{unicode}. On
Python 2 only C{bytes} are supported since that is what all files expect
in practice.
Callable that writes JSON messages to a file that accepts either C{bytes}
or C{str}.

@ivar file: The file to which messages will be written.

Expand All @@ -425,48 +450,68 @@ class FileDestination(PClass):
"""

file = field(mandatory=True)
encoder = field(mandatory=True)
_json_default = field(mandatory=True)
_dumps = field(mandatory=True)
_linebreak = field(mandatory=True)

def __new__(cls, file, encoder=EliotJSONEncoder):
def __new__(cls, file, encoder=None, json_default=json_default):
"""
Use ``json_default`` to pass in a default function for JSON dumping.

The ``encoder`` parameter is deprecated.
"""
if isinstance(file, IOBase) and not file.writable():
raise RuntimeError("Given file {} is not writeable.")

json_default = _json_default_from_encoder_and_json_default(
encoder, json_default
)

unicodeFile = False
try:
file.write(b"")
except TypeError:
unicodeFile = True

if unicodeFile:
# On Python 3 native json module outputs unicode:
_dumps = pyjson.dumps
_dumps = _unicode_dumps
_linebreak = "\n"
else:
_dumps = bytesjson.dumps
_dumps = orjson.dumps
_linebreak = b"\n"
return PClass.__new__(
cls, file=file, _dumps=_dumps, _linebreak=_linebreak, encoder=encoder
cls,
file=file,
_dumps=_dumps,
_linebreak=_linebreak,
_json_default=json_default,
)

def __call__(self, message):
"""
@param message: A message dictionary.
"""
self.file.write(self._dumps(message, cls=self.encoder) + self._linebreak)
self.file.write(
self._dumps(message, default=self._json_default) + self._linebreak
)
self.file.flush()


def to_file(output_file, encoder=EliotJSONEncoder):
def to_file(output_file, encoder=None, json_default=json_default):
"""
Add a destination that writes a JSON message per line to the given file.

@param output_file: A file-like object.

@param encoder: A JSONEncoder subclass to use when encoding JSON.
@param encoder: DEPRECATED. A JSONEncoder subclass to use when encoding
JSON.

@param json_default: A callable that handles objects the default JSON
serializer can't handle.
"""
Logger._destinations.add(FileDestination(file=output_file, encoder=encoder))
Logger._destinations.add(
FileDestination(file=output_file, encoder=encoder, json_default=json_default)
)


# The default Logger, used when none is specified:
Expand Down
10 changes: 3 additions & 7 deletions eliot/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@
Command line program for filtering line-based Eliot logs.
"""

from __future__ import unicode_literals, absolute_import

if __name__ == "__main__":
import eliot.filter

eliot.filter.main()

import sys
from datetime import datetime, timedelta
from json import JSONEncoder

from ._bytesjson import dumps, loads
from json import JSONEncoder, dumps, loads


class _DatetimeJSONEncoder(JSONEncoder):
Expand Down Expand Up @@ -61,7 +57,7 @@ def run(self):
result = self._evaluate(message)
if result is self._SKIP:
continue
self.output.write(dumps(result, cls=_DatetimeJSONEncoder) + b"\n")
self.output.write(dumps(result, cls=_DatetimeJSONEncoder) + "\n")

def _evaluate(self, message):
"""
Expand All @@ -83,7 +79,7 @@ def _evaluate(self, message):
)


USAGE = b"""\
USAGE = """\
Usage: cat eliot.log | python -m eliot.filter <expr>

Read JSON-expression per line from stdin, and filter it using a Python
Expand Down
2 changes: 1 addition & 1 deletion eliot/journald.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from os import strerror
from sys import argv
from os.path import basename
from orjson import dumps

from ._bytesjson import dumps
from ._message import TASK_UUID_FIELD, MESSAGE_TYPE_FIELD
from ._action import ACTION_TYPE_FIELD, ACTION_STATUS_FIELD, FAILED_STATUS

Expand Down
70 changes: 46 additions & 24 deletions eliot/json.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,58 @@
"""Custom JSON encoding support."""

from __future__ import absolute_import

from typing import Callable
import json
import sys


class EliotJSONEncoder(json.JSONEncoder):
"""JSON encoder with additional functionality.
"""
DEPRECATED. JSON encoder with additional functionality.

In particular, supports NumPy types.
"""

def default(self, o):
numpy = sys.modules.get("numpy", None)
if numpy is not None:
if isinstance(o, numpy.floating):
return float(o)
if isinstance(o, numpy.integer):
return int(o)
if isinstance(o, numpy.bool_):
return bool(o)
if isinstance(o, numpy.ndarray):
if o.size > 10000:
# Too big to want to log as-is, log a summary:
return {
"array_start": o.flat[:10000].tolist(),
"original_shape": o.shape,
}
else:
return o.tolist()
return json.JSONEncoder.default(self, o)


__all__ = ["EliotJSONEncoder"]
return json_default(o)


def json_default(o: object) -> object:
"""
JSON object encoder for non-standard types. In particular, supports NumPy
types. If you are wrappnig it, call it last, as it will raise a
``TypeError`` on unsupported types.
"""
numpy = sys.modules.get("numpy", None)
if numpy is not None:
if isinstance(o, numpy.floating):
return float(o)
if isinstance(o, numpy.integer):
return int(o)
if isinstance(o, numpy.bool_):
return bool(o)
if isinstance(o, numpy.ndarray):
if o.size > 10000:
# Too big to want to log as-is, log a summary:
return {
"array_start": o.flat[:10000].tolist(),
"original_shape": o.shape,
}
else:
return o.tolist()
raise TypeError("Unsupported type")


def _encoder_to_default_function(
encoder: json.JSONEncoder,
) -> Callable[[object], object]:
"""
Convert an encoder into a default function usable by ``orjson``.
"""

def default(o: object) -> object:
return encoder.default(o)

return default


__all__ = ["EliotJSONEncoder", "json_default"]
Loading
Loading