Skip to content

Commit af006a4

Browse files
committed
BUG: fix json segfaults
1 parent f4de157 commit af006a4

File tree

4 files changed

+280
-38
lines changed

4 files changed

+280
-38
lines changed

doc/source/whatsnew/v0.18.1.txt

+4
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ Other Enhancements
148148
- ``pd.read_csv()`` now supports opening files using xz compression, via extension inference or explicit ``compression='xz'`` is specified; ``xz`` compressions is also supported by ``DataFrame.to_csv`` in the same way (:issue:`11852`)
149149
- ``pd.read_msgpack()`` now always gives writeable ndarrays even when compression is used (:issue:`12359`).
150150
- ``pd.read_msgpack()`` now supports serializing and de-serializing categoricals with msgpack (:issue:`12573`)
151+
- ``(DataFrame|Series).to_json()`` now supports `DataFrame`s that contain categorical and sparse data (:issue:`10778`)
151152
- ``interpolate()`` now supports ``method='akima'`` (:issue:`7588`).
152153
- ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`)
153154
- Added ``weekday_name`` as a component to ``DatetimeIndex`` and ``.dt`` accessor. (:issue:`11128`)
@@ -391,6 +392,9 @@ Deprecations
391392

392393

393394

395+
- Potential segfault in ``DataFrame.to_json`` when serialising ``datetime.time`` (:issue:`11473`).
396+
- Potential segfault in ``DataFrame.to_json`` when attempting to serialise 0d array (:issue:`11299`).
397+
- Segfault in ``to_json`` when attempting to serialise ``DataFrame`` or ``Series`` with non-ndarray values (:issue:`10778`).
394398

395399

396400

pandas/io/tests/test_json/test_pandas.py

+93
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,99 @@ def my_handler_raises(obj):
821821
DataFrame({'a': [1, 2, object()]}).to_json,
822822
default_handler=my_handler_raises)
823823

824+
def test_categorical(self):
825+
# GH4377 df.to_json segfaults with non-ndarray blocks
826+
df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
827+
df["B"] = df["A"]
828+
expected = df.to_json()
829+
830+
df["B"] = df["A"].astype('category')
831+
self.assertEqual(expected, df.to_json())
832+
833+
s = df["A"]
834+
sc = df["B"]
835+
self.assertEqual(s.to_json(), sc.to_json())
836+
837+
def test_datetime_tz(self):
838+
# GH4377 df.to_json segfaults with non-ndarray blocks
839+
tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
840+
tz_naive = tz_range.tz_convert('utc').tz_localize(None)
841+
842+
df = DataFrame({
843+
'A': tz_range,
844+
'B': pd.date_range('20130101', periods=3)})
845+
846+
df_naive = df.copy()
847+
df_naive['A'] = tz_naive
848+
expected = df_naive.to_json()
849+
self.assertEqual(expected, df.to_json())
850+
851+
stz = Series(tz_range)
852+
s_naive = Series(tz_naive)
853+
self.assertEqual(stz.to_json(), s_naive.to_json())
854+
855+
def test_sparse(self):
856+
# GH4377 df.to_json segfaults with non-ndarray blocks
857+
df = pd.DataFrame(np.random.randn(10, 4))
858+
df.ix[:8] = np.nan
859+
860+
sdf = df.to_sparse()
861+
expected = df.to_json()
862+
self.assertEqual(expected, sdf.to_json())
863+
864+
s = pd.Series(np.random.randn(10))
865+
s.ix[:8] = np.nan
866+
ss = s.to_sparse()
867+
868+
expected = s.to_json()
869+
self.assertEqual(expected, ss.to_json())
870+
871+
def test_tz_is_utc(self):
872+
exp = '"2013-01-10T05:00:00.000Z"'
873+
874+
ts = Timestamp('2013-01-10 05:00:00Z')
875+
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
876+
dt = ts.to_datetime()
877+
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))
878+
879+
ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
880+
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
881+
dt = ts.to_datetime()
882+
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))
883+
884+
ts = Timestamp('2013-01-10 00:00:00-0500')
885+
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
886+
dt = ts.to_datetime()
887+
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))
888+
889+
def test_tz_range_is_utc(self):
890+
exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
891+
dfexp = ('{"DT":{'
892+
'"0":"2013-01-01T05:00:00.000Z",'
893+
'"1":"2013-01-02T05:00:00.000Z"}}')
894+
895+
tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
896+
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
897+
dti = pd.DatetimeIndex(tz_range)
898+
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
899+
df = DataFrame({'DT': dti})
900+
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))
901+
902+
tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
903+
tz='US/Eastern')
904+
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
905+
dti = pd.DatetimeIndex(tz_range)
906+
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
907+
df = DataFrame({'DT': dti})
908+
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))
909+
910+
tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
911+
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
912+
dti = pd.DatetimeIndex(tz_range)
913+
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
914+
df = DataFrame({'DT': dti})
915+
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))
916+
824917

825918
if __name__ == '__main__':
826919
import nose

pandas/io/tests/test_json/test_ujson.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import numpy as np
2424
from numpy.testing import (assert_array_almost_equal_nulp,
2525
assert_approx_equal)
26-
import pytz
2726
from pandas import DataFrame, Series, Index, NaT, DatetimeIndex
2827
import pandas.util.testing as tm
2928

@@ -365,15 +364,30 @@ def test_encodeTimeConversion(self):
365364
datetime.time(),
366365
datetime.time(1, 2, 3),
367366
datetime.time(10, 12, 15, 343243),
368-
datetime.time(10, 12, 15, 343243, pytz.utc),
369-
# datetime.time(10, 12, 15, 343243, dateutil.tz.gettz('UTC')), #
370-
# this segfaults! No idea why.
371367
]
372368
for test in tests:
373369
output = ujson.encode(test)
374370
expected = '"%s"' % test.isoformat()
375371
self.assertEqual(expected, output)
376372

373+
def test_encodeTimeConversion_pytz(self):
374+
# GH11473 to_json segfaults with timezone-aware datetimes
375+
tm._skip_if_no_pytz()
376+
import pytz
377+
test = datetime.time(10, 12, 15, 343243, pytz.utc)
378+
output = ujson.encode(test)
379+
expected = '"%s"' % test.isoformat()
380+
self.assertEqual(expected, output)
381+
382+
def test_encodeTimeConversion_dateutil(self):
383+
# GH11473 to_json segfaults with timezone-aware datetimes
384+
tm._skip_if_no_dateutil()
385+
import dateutil
386+
test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
387+
output = ujson.encode(test)
388+
expected = '"%s"' % test.isoformat()
389+
self.assertEqual(expected, output)
390+
377391
def test_nat(self):
378392
input = NaT
379393
assert ujson.encode(input) == 'null', "Expected null"

0 commit comments

Comments
 (0)