From 7f88d102d9485173a60bc03b7714750283ee6c2b Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 01:02:19 +0000 Subject: [PATCH 1/6] add temp tests to explore different combinations --- pandas/tests/frame/test_dtypes.py | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 68844aeeb081e..1a0f8655f941e 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,6 +815,42 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) + def test_temp_32(self): + result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + expected = DataFrame( + np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int32 + ) + tm.assert_frame_equal(result, expected) + + def test_temp_64(self): + result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + expected = DataFrame( + np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int64 + ) + tm.assert_frame_equal(result, expected) + + def test_temp_32_mask(self): + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int32 + ) + + tm.assert_frame_equal(result, expected) + + def test_temp_64_mask(self): + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 + ) + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] ) From e0f72152969bba0488cc958a4f73db1ebecb0b4b Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 01:05:57 +0000 Subject: [PATCH 2/6] put tests at the end --- pandas/tests/frame/test_dtypes.py | 72 +++++++++++++++---------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 1a0f8655f941e..9dc5be50eb3a3 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,42 +815,6 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) - def test_temp_32(self): - result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - expected = DataFrame( - np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int32 - ) - tm.assert_frame_equal(result, expected) - - def test_temp_64(self): - result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - expected = DataFrame( - np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int64 - ) - tm.assert_frame_equal(result, expected) - - def test_temp_32_mask(self): - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - mask = np.array([[True, False, True], [False, True, True]]) - - result = df.where(mask) - expected = DataFrame( - [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int32 - ) - - tm.assert_frame_equal(result, expected) - - def test_temp_64_mask(self): - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - mask = np.array([[True, False, True], [False, True, True]]) - - result = df.where(mask) - expected = DataFrame( - [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 - ) - - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] ) @@ -1235,3 +1199,39 @@ def test_astype_str(self, timezone_frame): assert ( "2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00" ) in result + + def test_temp_32(self): + result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + expected = DataFrame( + np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int32 + ) + tm.assert_frame_equal(result, expected) + + def test_temp_64(self): + result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + expected = DataFrame( + np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int64 + ) + tm.assert_frame_equal(result, expected) + + def test_temp_32_mask(self): + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int32 + ) + + tm.assert_frame_equal(result, expected) + + def test_temp_64_mask(self): + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 + ) + + tm.assert_frame_equal(result, expected) From e4f42a9f7618bc41097ce424d88eeaddde109d21 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 13:40:26 +0000 Subject: [PATCH 3/6] add 2 temp test to explore df with nans dtypes --- pandas/tests/frame/test_dtypes.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 9dc5be50eb3a3..8394fb9786d74 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -1214,6 +1214,20 @@ def test_temp_64(self): ) tm.assert_frame_equal(result, expected) + def test_temp_32_nan(self): + result = DataFrame([[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC")) + expected = DataFrame( + [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int32 + ) + tm.assert_frame_equal(result, expected) + + def test_temp_64_nan(self): + result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + expected = DataFrame( + [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 + ) + tm.assert_frame_equal(result, expected) + def test_temp_32_mask(self): df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) mask = np.array([[True, False, True], [False, True, True]]) From dd599647451f8bdb130a859b41d91571e6ebe5b8 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 14:31:45 +0000 Subject: [PATCH 4/6] change input to test with nan --- pandas/tests/frame/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 8394fb9786d74..533e7c78b1ae7 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -1222,7 +1222,7 @@ def test_temp_32_nan(self): tm.assert_frame_equal(result, expected) def test_temp_64_nan(self): - result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + result = DataFrame([[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC")) expected = DataFrame( [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 ) From e111ca2082bc8ed417ddb7a8f21b4a5980c728b5 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 18:41:14 +0000 Subject: [PATCH 5/6] change test to be more relevant to the issue --- pandas/tests/frame/test_dtypes.py | 75 +++++++++++-------------------- 1 file changed, 25 insertions(+), 50 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 533e7c78b1ae7..51120ebedd664 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,6 +815,31 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) + def test_df_where_change_dtype(self): + # GH 16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, False], [False, False, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC") + ) + + tm.assert_frame_equal(result, expected) + + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + result = df.where(mask) + A = pd.Categorical([0, np.nan], categories=[0, 3]) + B = pd.Categorical([np.nan, np.nan], categories=[1, 4]) + C = pd.Categorical([np.nan, 5], categories=[2, 5]) + expected = DataFrame({"A": A, "B": B, "C": C}) + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] ) @@ -1199,53 +1224,3 @@ def test_astype_str(self, timezone_frame): assert ( "2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00" ) in result - - def test_temp_32(self): - result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - expected = DataFrame( - np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int32 - ) - tm.assert_frame_equal(result, expected) - - def test_temp_64(self): - result = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - expected = DataFrame( - np.arange(2 * 3).reshape(2, 3), columns=list("ABC"), dtype=np.int64 - ) - tm.assert_frame_equal(result, expected) - - def test_temp_32_nan(self): - result = DataFrame([[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC")) - expected = DataFrame( - [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int32 - ) - tm.assert_frame_equal(result, expected) - - def test_temp_64_nan(self): - result = DataFrame([[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC")) - expected = DataFrame( - [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 - ) - tm.assert_frame_equal(result, expected) - - def test_temp_32_mask(self): - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - mask = np.array([[True, False, True], [False, True, True]]) - - result = df.where(mask) - expected = DataFrame( - [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int32 - ) - - tm.assert_frame_equal(result, expected) - - def test_temp_64_mask(self): - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) - mask = np.array([[True, False, True], [False, True, True]]) - - result = df.where(mask) - expected = DataFrame( - [[0, np.nan, 2], [np.nan, 4, 5]], columns=list("ABC"), dtype=np.int64 - ) - - tm.assert_frame_equal(result, expected) From a5971c8bc36543fc32880b5f38d93a12fc12de39 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sat, 9 Nov 2019 18:46:58 +0000 Subject: [PATCH 6/6] move test --- pandas/tests/frame/test_dtypes.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 51120ebedd664..93db7094d7e75 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -815,6 +815,16 @@ def test_astype_extension_dtypes_duplicate_col(self, dtype): expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] + ) + def test_astype_column_metadata(self, dtype): + # GH 19920 + columns = pd.UInt64Index([100, 200, 300], name="foo") + df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) + df = df.astype(dtype) + tm.assert_index_equal(df.columns, columns) + def test_df_where_change_dtype(self): # GH 16979 df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) @@ -840,16 +850,6 @@ def test_df_where_change_dtype(self): tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] - ) - def test_astype_column_metadata(self, dtype): - # GH 19920 - columns = pd.UInt64Index([100, 200, 300], name="foo") - df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) - df = df.astype(dtype) - tm.assert_index_equal(df.columns, columns) - @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_from_datetimelike_to_objectt(self, dtype, unit):