From 676a4e5cef1cf37704ef702699db1fd6c89028ea Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Mon, 20 Mar 2017 19:32:02 -0300 Subject: [PATCH 1/4] Test --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index a181412be2719..efd075dabcba9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ Release Notes ============= -The list of changes to pandas between each release can be found +The list of changes to Pandas between each release can be found [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full details, see the commit logs at http://github.com/pandas-dev/pandas. From cc4c15b5b2beae639d31b9b9916a5e0b3384d5e7 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Wed, 29 Mar 2017 05:36:56 -0300 Subject: [PATCH 2/4] Fixing coersion bug at SparseDataFrame construction --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/sparse/frame.py | 2 +- pandas/tests/sparse/test_frame.py | 22 ++++++++++++++-------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2e822729873ad..359a038d236b0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1045,7 +1045,7 @@ Bug Fixes - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 41f301f263374..83e662c5875a8 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -159,7 +159,7 @@ def _init_dict(self, data, index, columns, dtype=None): v = [v.get(i, nan) for i in index] v = sp_maker(v) - sdict[k] = v + sdict[k] = v.astype(np.dtype(dtype)) if dtype is not None else v # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index c0c678c184ee8..1ed9c3e1632b9 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -28,7 +28,6 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): - klass = SparseDataFrame def setUp(self): @@ -758,7 +757,8 @@ def test_sparse_frame_fillna_limit(self): def test_rename(self): # just check this works renamed = self.frame.rename(index=str) # noqa - renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa + renamed = self.frame.rename( + columns=lambda x: '%s%d' % (x, len(x))) # noqa def test_corr(self): res = self.frame.corr() @@ -967,7 +967,6 @@ def _check(frame, orig): def test_shift(self): def _check(frame, orig): - shifted = frame.shift(0) exp = orig.shift(0) tm.assert_frame_equal(shifted.to_dense(), exp) @@ -1060,7 +1059,7 @@ def test_sparse_pow_issue(self): df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 - result = 1**df + result = 1 ** df r1 = result.take([0], 1)['A'] r2 = result['A'] @@ -1126,7 +1125,7 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) -@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 +@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('cd')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) @@ -1180,7 +1179,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_equal(sdf.to_coo().dtype, np.object_) -@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object @@ -1226,7 +1225,6 @@ def test_from_to_scipy_object(spmatrix, fill_value): class TestSparseDataFrameArithmetic(tm.TestCase): - def test_numeric_op_scalar(self): df = pd.DataFrame({'A': [nan, nan, 0, 1, ], 'B': [0, 1, 2, nan], @@ -1255,7 +1253,6 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): - def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], @@ -1299,3 +1296,12 @@ def test_numpy_func_call(self): 'std', 'min', 'max'] for func in funcs: getattr(np, func)(self.frame) + + def test_type_coercion_at_construction(self): + # GH 15682 + df = pd.SparseDataFrame( + {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', + default_fill_value=0) + result = df.dtypes[0] + expected = np.dtype('uint8') + assert result == expected From 483bb2cd281c7f0089205461a7e3efb9efacffa7 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Wed, 29 Mar 2017 15:02:14 -0300 Subject: [PATCH 3/4] Doing adjustments as per @jreback requests --- pandas/sparse/frame.py | 4 ++-- pandas/tests/sparse/test_frame.py | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 83e662c5875a8..455d120cca640 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -142,7 +142,7 @@ def _init_dict(self, data, index, columns, dtype=None): sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, - copy=True) + copy=True, dtype=dtype) sdict = DataFrame() for k, v in compat.iteritems(data): if isinstance(v, Series): @@ -159,7 +159,7 @@ def _init_dict(self, data, index, columns, dtype=None): v = [v.get(i, nan) for i in index] v = sp_maker(v) - sdict[k] = v.astype(np.dtype(dtype)) if dtype is not None else v + sdict[k] = v # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 1ed9c3e1632b9..5edc9b843f47d 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -236,6 +236,18 @@ def test_constructor_nan_dataframe(self): dtype=float) tm.assert_sp_frame_equal(result, expected) + def test_type_coercion_at_construction(self): + # GH 15682 + result = pd.SparseDataFrame( + {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', + default_fill_value=0) + expected = pd.SparseDataFrame( + {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'), + 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'), + 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')}, + default_fill_value=0) + tm.assert_sp_frame_equal(result, expected) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan @@ -756,8 +768,8 @@ def test_sparse_frame_fillna_limit(self): def test_rename(self): # just check this works - renamed = self.frame.rename(index=str) # noqa - renamed = self.frame.rename( + rename = self.frame.rename(index=str) # noqa + rename = self.frame.rename( columns=lambda x: '%s%d' % (x, len(x))) # noqa def test_corr(self): @@ -1225,6 +1237,7 @@ def test_from_to_scipy_object(spmatrix, fill_value): class TestSparseDataFrameArithmetic(tm.TestCase): + def test_numeric_op_scalar(self): df = pd.DataFrame({'A': [nan, nan, 0, 1, ], 'B': [0, 1, 2, nan], @@ -1297,11 +1310,3 @@ def test_numpy_func_call(self): for func in funcs: getattr(np, func)(self.frame) - def test_type_coercion_at_construction(self): - # GH 15682 - df = pd.SparseDataFrame( - {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', - default_fill_value=0) - result = df.dtypes[0] - expected = np.dtype('uint8') - assert result == expected From 04fba8de51b0e3814b059e797b4bcb1bb0b7cca4 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Wed, 29 Mar 2017 22:03:22 -0300 Subject: [PATCH 4/4] Adding test_rename test cases (were missing) --- pandas/tests/sparse/test_frame.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 5edc9b843f47d..ae1a1e35f1859 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -767,10 +767,18 @@ def test_sparse_frame_fillna_limit(self): tm.assert_frame_equal(result, expected) def test_rename(self): - # just check this works - rename = self.frame.rename(index=str) # noqa - rename = self.frame.rename( - columns=lambda x: '%s%d' % (x, len(x))) # noqa + result = self.frame.rename(index=str) + expected = SparseDataFrame(self.data, index=self.dates.strftime( + "%Y-%m-%d %H:%M:%S")) + tm.assert_sp_frame_equal(result, expected) + + result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C1': np.arange(10, dtype=np.float64), + 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + expected = SparseDataFrame(data, index=self.dates) + tm.assert_sp_frame_equal(result, expected) def test_corr(self): res = self.frame.corr() @@ -1309,4 +1317,3 @@ def test_numpy_func_call(self): 'std', 'min', 'max'] for func in funcs: getattr(np, func)(self.frame) -