Skip to content

BUG: fix json segfaults #12802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ Other Enhancements
- ``pd.read_csv()`` now supports opening files using xz compression, via extension inference or explicit ``compression='xz'`` is specified; ``xz`` compressions is also supported by ``DataFrame.to_csv`` in the same way (:issue:`11852`)
- ``pd.read_msgpack()`` now always gives writeable ndarrays even when compression is used (:issue:`12359`).
- ``pd.read_msgpack()`` now supports serializing and de-serializing categoricals with msgpack (:issue:`12573`)
- ``(DataFrame|Series).to_json()`` now supports `DataFrame`s that contain categorical and sparse data (:issue:`10778`)
- ``interpolate()`` now supports ``method='akima'`` (:issue:`7588`).
- ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`)
- Added ``weekday_name`` as a component to ``DatetimeIndex`` and ``.dt`` accessor. (:issue:`11128`)
Expand Down Expand Up @@ -391,6 +392,9 @@ Deprecations



- Potential segfault in ``DataFrame.to_json`` when serialising ``datetime.time`` (:issue:`11473`).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue for the datetime w/tz fixes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DatetimeTZBlock was covered by #10778 mentioned here. (#11473 is a separate issue related to datetime objects)

- Potential segfault in ``DataFrame.to_json`` when attempting to serialise 0d array (:issue:`11299`).
- Segfault in ``to_json`` when attempting to serialise ``DataFrame`` or ``Series`` with non-ndarray values (:issue:`10778`).



Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,99 @@ def my_handler_raises(obj):
DataFrame({'a': [1, 2, object()]}).to_json,
default_handler=my_handler_raises)

def test_categorical(self):
# GH4377 df.to_json segfaults with non-ndarray blocks
df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
df["B"] = df["A"]
expected = df.to_json()

df["B"] = df["A"].astype('category')
self.assertEqual(expected, df.to_json())

s = df["A"]
sc = df["B"]
self.assertEqual(s.to_json(), sc.to_json())

def test_datetime_tz(self):
# GH4377 df.to_json segfaults with non-ndarray blocks
tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
tz_naive = tz_range.tz_convert('utc').tz_localize(None)

df = DataFrame({
'A': tz_range,
'B': pd.date_range('20130101', periods=3)})

df_naive = df.copy()
df_naive['A'] = tz_naive
expected = df_naive.to_json()
self.assertEqual(expected, df.to_json())

stz = Series(tz_range)
s_naive = Series(tz_naive)
self.assertEqual(stz.to_json(), s_naive.to_json())

def test_sparse(self):
# GH4377 df.to_json segfaults with non-ndarray blocks
df = pd.DataFrame(np.random.randn(10, 4))
df.ix[:8] = np.nan

sdf = df.to_sparse()
expected = df.to_json()
self.assertEqual(expected, sdf.to_json())

s = pd.Series(np.random.randn(10))
s.ix[:8] = np.nan
ss = s.to_sparse()

expected = s.to_json()
self.assertEqual(expected, ss.to_json())

def test_tz_is_utc(self):
exp = '"2013-01-10T05:00:00.000Z"'

ts = Timestamp('2013-01-10 05:00:00Z')
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
dt = ts.to_datetime()
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))

ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
dt = ts.to_datetime()
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))

ts = Timestamp('2013-01-10 00:00:00-0500')
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
dt = ts.to_datetime()
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))

def test_tz_range_is_utc(self):
exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
dfexp = ('{"DT":{'
'"0":"2013-01-01T05:00:00.000Z",'
'"1":"2013-01-02T05:00:00.000Z"}}')

tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
dti = pd.DatetimeIndex(tz_range)
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
df = DataFrame({'DT': dti})
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))

tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
tz='US/Eastern')
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
dti = pd.DatetimeIndex(tz_range)
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
df = DataFrame({'DT': dti})
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))

tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
dti = pd.DatetimeIndex(tz_range)
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
df = DataFrame({'DT': dti})
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))


if __name__ == '__main__':
import nose
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import numpy as np
from numpy.testing import (assert_array_almost_equal_nulp,
assert_approx_equal)
import pytz
from pandas import DataFrame, Series, Index, NaT, DatetimeIndex
import pandas.util.testing as tm

Expand Down Expand Up @@ -365,15 +364,30 @@ def test_encodeTimeConversion(self):
datetime.time(),
datetime.time(1, 2, 3),
datetime.time(10, 12, 15, 343243),
datetime.time(10, 12, 15, 343243, pytz.utc),
# datetime.time(10, 12, 15, 343243, dateutil.tz.gettz('UTC')), #
# this segfaults! No idea why.
]
for test in tests:
output = ujson.encode(test)
expected = '"%s"' % test.isoformat()
self.assertEqual(expected, output)

def test_encodeTimeConversion_pytz(self):
# GH11473 to_json segfaults with timezone-aware datetimes
tm._skip_if_no_pytz()
import pytz
test = datetime.time(10, 12, 15, 343243, pytz.utc)
output = ujson.encode(test)
expected = '"%s"' % test.isoformat()
self.assertEqual(expected, output)

def test_encodeTimeConversion_dateutil(self):
# GH11473 to_json segfaults with timezone-aware datetimes
tm._skip_if_no_dateutil()
import dateutil
test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
output = ujson.encode(test)
expected = '"%s"' % test.isoformat()
self.assertEqual(expected, output)

def test_nat(self):
input = NaT
assert ujson.encode(input) == 'null', "Expected null"
Expand Down
Loading