diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 8c582f2618882..d703fa7bb54a1 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -370,7 +370,7 @@ Bug Fixes - Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`) - Bug in `pd.eval` using ``numexpr`` engine coerces 1 element numpy array to scalar (:issue:`10546`) - Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) -- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`) +- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`, :issue:`10630`) - Bug in `pandas.read_csv` with kwargs ``index_col=False``, ``index_col=['a', 'b']`` or ``dtype`` (:issue:`10413`, :issue:`10467`, :issue:`10577`) - Bug in `Series.from_csv` with ``header`` kwarg not setting the ``Series.name`` or the ``Series.index.name`` (:issue:`10483`) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 847a7c4f90216..d7655e9e052c0 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -169,10 +169,16 @@ def read(fh): u('datetime64[us]'): np.dtype('M8[us]'), 22: np.dtype('m8[ns]'), u('timedelta64[ns]'): np.dtype('m8[ns]'), - u('timedelta64[us]'): np.dtype('m8[us]')} + u('timedelta64[us]'): np.dtype('m8[us]'), + + # this is platform int, which we need to remap to np.int64 + # for compat on windows platforms + 7: np.dtype('int64'), +} def dtype_for(t): + """ return my dtype mapping, whether number or name """ if t in dtype_dict: return dtype_dict[t] return np.typeDict[t] @@ -266,7 +272,7 @@ def encode(obj): 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), 'freq': getattr(obj, 'freqstr', None), - 'dtype': obj.dtype.num, + 'dtype': obj.dtype.name, 'data': convert(obj.asi8), 'compress': compressor} elif isinstance(obj, DatetimeIndex): @@ -279,7 +285,7 @@ def encode(obj): return {'typ': 'datetime_index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), - 'dtype': obj.dtype.num, + 'dtype': obj.dtype.name, 'data': convert(obj.asi8), 'freq': getattr(obj, 'freqstr', None), 'tz': tz, @@ -288,14 +294,14 @@ def encode(obj): return {'typ': 'multi_index', 'klass': obj.__class__.__name__, 'names': getattr(obj, 'names', None), - 'dtype': obj.dtype.num, + 'dtype': obj.dtype.name, 'data': convert(obj.values), 'compress': compressor} else: return {'typ': 'index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), - 'dtype': obj.dtype.num, + 'dtype': obj.dtype.name, 'data': convert(obj.values), 'compress': compressor} elif isinstance(obj, Series): @@ -305,7 +311,7 @@ def encode(obj): ) #d = {'typ': 'sparse_series', # 'klass': obj.__class__.__name__, - # 'dtype': obj.dtype.num, + # 'dtype': obj.dtype.name, # 'index': obj.index, # 'sp_index': obj.sp_index, # 'sp_values': convert(obj.sp_values), @@ -318,7 +324,7 @@ def encode(obj): 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), 'index': obj.index, - 'dtype': obj.dtype.num, + 'dtype': obj.dtype.name, 'data': convert(obj.values), 'compress': compressor} elif issubclass(tobj, NDFrame): @@ -360,7 +366,7 @@ def encode(obj): 'locs': b.mgr_locs.as_array, 'values': convert(b.values), 'shape': b.values.shape, - 'dtype': b.dtype.num, + 'dtype': b.dtype.name, 'klass': b.__class__.__name__, 'compress': compressor } for b in data.blocks]} @@ -413,7 +419,7 @@ def encode(obj): return {'typ': 'ndarray', 'shape': obj.shape, 'ndim': obj.ndim, - 'dtype': obj.dtype.num, + 'dtype': obj.dtype.name, 'data': convert(obj), 'compress': compressor} elif isinstance(obj, np.number): @@ -449,11 +455,12 @@ def decode(obj): return Period(ordinal=obj['ordinal'], freq=obj['freq']) elif typ == 'index': dtype = dtype_for(obj['dtype']) - data = unconvert(obj['data'], np.typeDict[obj['dtype']], + data = unconvert(obj['data'], dtype, obj.get('compress')) return globals()[obj['klass']](data, dtype=dtype, name=obj['name']) elif typ == 'multi_index': - data = unconvert(obj['data'], np.typeDict[obj['dtype']], + dtype = dtype_for(obj['dtype']) + data = unconvert(obj['data'], dtype, obj.get('compress')) data = [tuple(x) for x in data] return globals()[obj['klass']].from_tuples(data, names=obj['names']) diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack b/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack new file mode 100644 index 0000000000000..1e128f42a37a6 Binary files /dev/null and b/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack differ diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack b/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack new file mode 100644 index 0000000000000..156905faece90 Binary files /dev/null and b/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack differ diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack b/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack new file mode 100644 index 0000000000000..6bf1b9b9afaaa Binary files /dev/null and b/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack differ diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle b/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle new file mode 100644 index 0000000000000..a2a3ffa044013 Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle b/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle new file mode 100644 index 0000000000000..6b8fdaa21badc Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle differ diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle b/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle new file mode 100644 index 0000000000000..60101c2f1e95e Binary files /dev/null and b/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle differ diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index 93d55c654de90..ceb845073e2c3 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -186,6 +186,30 @@ def test_header_not_enough_lines(self): '1,2,3\n' '4,5,6') + reader = TextReader(StringIO(data), delimiter=',', header=2) + header = reader.header + expected = [['a', 'b', 'c']] + self.assertEqual(header, expected) + + recs = reader.read() + expected = {0 : [1, 4], 1 : [2, 5], 2 : [3, 6]} + assert_array_dicts_equal(expected, recs) + + # not enough rows + self.assertRaises(parser.CParserError, TextReader, StringIO(data), + delimiter=',', header=5, as_recarray=True) + + def test_header_not_enough_lines_as_recarray(self): + + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64, only when all tests are run") + + data = ('skip this\n' + 'skip this\n' + 'a,b,c\n' + '1,2,3\n' + '4,5,6') + reader = TextReader(StringIO(data), delimiter=',', header=2, as_recarray=True) header = reader.header @@ -246,6 +270,21 @@ def _make_reader(**kwds): self.assertTrue((result[0] == ex_values).all()) self.assertEqual(result[1].dtype, 'S4') + def test_numpy_string_dtype_as_recarray(self): + data = """\ +a,1 +aa,2 +aaa,3 +aaaa,4 +aaaaa,5""" + + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64, only when all tests are run") + + def _make_reader(**kwds): + return TextReader(StringIO(data), delimiter=',', header=None, + **kwds) + reader = _make_reader(dtype='S4', as_recarray=True) result = reader.read() self.assertEqual(result['0'].dtype, 'S4') diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 303ecdbf0ec6e..cb99c1705c5eb 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -114,6 +114,9 @@ def test_decimalDecodeTestPrecise(self): self.assertEqual(sut, decoded) def test_encodeDoubleTinyExponential(self): + if compat.is_platform_windows() and not compat.PY3: + raise nose.SkipTest("buggy on win-64 for py2") + num = 1e-40 self.assertEqual(num, ujson.decode(ujson.encode(num))) num = 1e-100 diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 62dbb0090aac5..724dcf1de14d2 100755 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -3204,6 +3204,9 @@ def read_table(self, *args, **kwds): return read_table(*args, **kwds) def test_compact_ints(self): + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64, only when all tests are run") + data = ('0,1,0,0\n' '1,1,0,0\n' '0,1,0,1') @@ -3515,6 +3518,25 @@ def test_compact_ints(self): '1,1,0,0\n' '0,1,0,1') + result = read_csv(StringIO(data), delimiter=',', header=None, + compact_ints=True) + ex_dtype = np.dtype([(str(i), 'i1') for i in range(4)]) + self.assertEqual(result.to_records(index=False).dtype, ex_dtype) + + result = read_csv(StringIO(data), delimiter=',', header=None, + compact_ints=True, + use_unsigned=True) + ex_dtype = np.dtype([(str(i), 'u1') for i in range(4)]) + self.assertEqual(result.to_records(index=False).dtype, ex_dtype) + + def test_compact_ints_as_recarray(self): + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64, only when all tests are run") + + data = ('0,1,0,0\n' + '1,1,0,0\n' + '0,1,0,1') + result = read_csv(StringIO(data), delimiter=',', header=None, compact_ints=True, as_recarray=True) ex_dtype = np.dtype([(str(i), 'i1') for i in range(4)]) @@ -3554,6 +3576,21 @@ def test_pass_dtype(self): 3,4.5 4,5.5""" + result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'}) + self.assertEqual(result['one'].dtype, 'u1') + self.assertEqual(result['two'].dtype, 'object') + + def test_pass_dtype_as_recarray(self): + data = """\ +one,two +1,2.5 +2,3.5 +3,4.5 +4,5.5""" + + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64, only when all tests are run") + result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'}, as_recarray=True) self.assertEqual(result['one'].dtype, 'u1') @@ -3623,6 +3660,7 @@ def test_usecols_dtypes(self): 4,5,6 7,8,9 10,11,12""" + result = self.read_csv(StringIO(data), usecols=(0, 1, 2), names=('a', 'b', 'c'), header=None, diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 440f4ffb46cb5..7f9e61571ebfc 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -8,7 +8,7 @@ import pandas.util.testing as tm from pandas.core.sparse import SparseSeries -from pandas import DataFrame +from pandas import DataFrame, compat from pandas._sparse import IntIndex, BlockIndex import pandas._sparse as splib @@ -230,6 +230,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64 when all tests are run") check_cases(_check_case)