Skip to content

Commit

Permalink
Prevent crashing from UnicodeDecodeError
Browse files Browse the repository at this point in the history
On Python 2, `sys.stdout` and `print` can normally handle any
combination of `str` and `unicode` objects. However,
`StringIO.StringIO` can only safely handle one or the other. If
the program writes both a `unicode` string, and a non-ASCII
`str` string, then the `getvalue()` method will fail with
`UnicodeDecodeError` [1].

In nose, that causes the script to suddenly abort, with the
cryptic `UnicodeDecodeError`.

This fix catches `UnicodeError` when trying to get the captured
output, and will replace the captured output with a warning
message.

Fixes #816

[1] <https://github.com/python/cpython/blob/2.7/Lib/StringIO.py#L258>
  • Loading branch information
jmoldow committed Mar 23, 2016
1 parent 7c26ad1 commit 4501ab4
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 6 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ Erik Rose
Sascha Peilicke
Andre Caron
Joscha Feth
Jordan Moldow
3 changes: 3 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

- Add option to suppress printing of coverage report
Patch by Eric Larson.
- Fix #816: UnicodeDecodeError on failing tests with output which contains
`unicode`s and non-ascii `str`s.
Patch by Jordan Moldow

1.3.7

Expand Down
43 changes: 37 additions & 6 deletions nose/plugins/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import logging
import os
import sys
import traceback
from nose.plugins.base import Plugin
from nose.pyversion import exc_to_unicode, force_unicode
from nose.util import ln
Expand Down Expand Up @@ -71,26 +72,56 @@ def beforeTest(self, test):
def formatError(self, test, err):
"""Add captured output to error report.
"""
test.capturedOutput = output = self.buffer
test.capturedOutput = output = ''
output_exc_info = None
try:
test.capturedOutput = output = self.buffer
except UnicodeError:
# python2's StringIO.StringIO [1] class has this warning:
#
# The StringIO object can accept either Unicode or 8-bit strings,
# but mixing the two may take some care. If both are used, 8-bit
# strings that cannot be interpreted as 7-bit ASCII (that use the
# 8th bit) will cause a UnicodeError to be raised when getvalue()
# is called.
#
# This exception handler is a protection against issue #816 [2].
# Capturing the exception info allows us to display it back to the
# user.
#
# [1] <https://github.com/python/cpython/blob/2.7/Lib/StringIO.py#L258>
# [2] <https://github.com/nose-devs/nose/issues/816>
output_exc_info = sys.exc_info()
self._buf = None
if not output:
if (not output) and (not output_exc_info):
# Don't return None as that will prevent other
# formatters from formatting and remove earlier formatters
# formats, instead return the err we got
return err
ec, ev, tb = err
return (ec, self.addCaptureToErr(ev, output), tb)
return (ec, self.addCaptureToErr(ev, output, output_exc_info=output_exc_info), tb)

def formatFailure(self, test, err):
"""Add captured output to failure report.
"""
return self.formatError(test, err)

def addCaptureToErr(self, ev, output):
def addCaptureToErr(self, ev, output, output_exc_info=None):
# If given, output_exc_info should be a 3-tuple from sys.exc_info(),
# from an exception raised while trying to get the captured output.
ev = exc_to_unicode(ev)
output = force_unicode(output)
return u'\n'.join([ev, ln(u'>> begin captured stdout <<'),
output, ln(u'>> end captured stdout <<')])
error_text = [ev, ln(u'>> begin captured stdout <<'),
output, ln(u'>> end captured stdout <<')]
if output_exc_info:
error_text.extend([u'OUTPUT ERROR: Could not get captured output.',
# <https://github.com/python/cpython/blob/2.7/Lib/StringIO.py#L258>
# <https://github.com/nose-devs/nose/issues/816>
u"The test might've printed both 'unicode' strings and non-ASCII 8-bit 'str' strings.",
ln(u'>> begin captured stdout exception traceback <<'),
u''.join(traceback.format_exception(*output_exc_info)),
ln(u'>> end captured stdout exception traceback <<')])
return u'\n'.join(error_text)

def start(self):
self.stdout.append(sys.stdout)
Expand Down
35 changes: 35 additions & 0 deletions unit_tests/test_capture_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from optparse import OptionParser
from nose.config import Config
from nose.plugins.capture import Capture
from nose.pyversion import force_unicode

if sys.version_info[0] == 2:
py2 = True
else:
py2 = False

class TestCapturePlugin(unittest.TestCase):

Expand Down Expand Up @@ -62,6 +68,35 @@ def test_captures_nonascii_stdout(self):
c.end()
self.assertEqual(c.buffer, "test 日本\n")

def test_does_not_crash_with_mixed_unicode_and_nonascii_str(self):
class Dummy:
pass
d = Dummy()
c = Capture()
c.start()
printed_nonascii_str = force_unicode("test 日本").encode('utf-8')
printed_unicode = force_unicode("Hello")
print printed_nonascii_str
print printed_unicode
try:
raise Exception("boom")
except:
err = sys.exc_info()
formatted = c.formatError(d, err)
_, fev, _ = formatted

if py2:
for string in [force_unicode(printed_nonascii_str, encoding='utf-8'), printed_unicode]:
assert string not in fev, "Output unexpectedly found in error message"
assert d.capturedOutput == '', "capturedOutput unexpectedly non-empty"
assert "OUTPUT ERROR" in fev
assert "captured stdout exception traceback" in fev
assert "UnicodeDecodeError" in fev
else:
for string in [repr(printed_nonascii_str), printed_unicode]:
assert string in fev, "Output not found in error message"
assert string in d.capturedOutput, "Output not attached to test"

def test_format_error(self):
class Dummy:
pass
Expand Down

0 comments on commit 4501ab4

Please sign in to comment.