From c5adb6af1fdfce5b740765b5e55f1a4d6d117482 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 29 Dec 2019 22:58:16 -0800 Subject: [PATCH 1/3] TST: Regression testing for fixed issues --- pandas/tests/groupby/test_groupby.py | 14 ++++++++++ pandas/tests/groupby/test_transform.py | 37 ++++++++++++++++++++++++++ pandas/tests/indexes/test_numeric.py | 6 +++++ pandas/tests/indexing/test_loc.py | 14 ++++++++++ pandas/tests/io/formats/test_to_csv.py | 10 +++++++ pandas/tests/io/parser/test_common.py | 10 +++++++ 6 files changed, 91 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8f88f68c69f2b..795f76d94cc25 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -588,6 +588,20 @@ def test_groupby_multiple_columns(df, op): tm.assert_series_equal(result, expected) +def test_as_index_select_column(): + # GH 5764 + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + result = df.groupby("A", as_index=False)["B"].get_group(1) + expected = pd.Series([2, 4], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum()) + expected = pd.Series( + [2, 6, 6], name="B", index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)]) + ) + tm.assert_series_equal(result, expected) + + def test_groupby_as_index_agg(df): grouped = df.groupby("A", as_index=False) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index c46180c1d11cd..1cdb08224082a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1138,3 +1138,40 @@ def func(grp): expected = pd.DataFrame([2, -2, 2, 4], columns=["B"]) tm.assert_frame_equal(result, expected) + + +def test_transform_lambda_indexing(): + # GH 7883 + df = pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"], + "B": ["one", "one", "two", "three", "two", "six", "five", "three"], + "C": range(8), + "D": range(8), + "E": range(8), + } + ) + df = df.set_index(["A", "B"]) + df = df.sort_index() + result = df.groupby(level="A").transform(lambda x: x.iloc[-1]) + expected = DataFrame( + { + "C": [3, 3, 7, 7, 4, 4, 4, 4], + "D": [3, 3, 7, 7, 4, 4, 4, 4], + "E": [3, 3, 7, 7, 4, 4, 4, 4], + }, + index=MultiIndex.from_tuples( + [ + ("bar", "one"), + ("bar", "three"), + ("flux", "six"), + ("flux", "three"), + ("foo", "five"), + ("foo", "one"), + ("foo", "two"), + ("foo", "two"), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 37976d89ecba4..6a3131c6cd7f6 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -736,6 +736,12 @@ def test_get_indexer(self): expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) + def test_get_indexer_nan(self): + # GH 7820 + result = Index([1, 2, np.nan]).get_indexer([np.nan]) + expected = np.array([2]) + tm.assert_numpy_array_equal(result, expected) + def test_intersection(self): index = self.create_index() other = Index([1, 2, 3, 4, 5]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8b3620e8cd843..9119ca0a4511b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -966,3 +966,17 @@ def test_loc_getitem_label_list_integer_labels( expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] tm.assert_frame_equal(result, expected, check_column_type=check_column_type) + + +def test_loc_setitem_float_intindex(): + # GH 8720 + rand_data = np.random.randn(8, 4) + result = pd.DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) + expected = pd.DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) + tm.assert_frame_equal(result, expected) + + result = pd.DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 24233a0ec84b1..f2f6d2a1e1e02 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,3 +1,4 @@ +import io import os import sys @@ -563,3 +564,12 @@ def test_to_csv_na_rep_long_string(self, df_new_type): result = df.to_csv(index=False, na_rep="mynull", encoding="ascii") assert expected == result + + def test_to_csv_timedelta_precision(self): + # GH 6783 + s = pd.Series([1, 1]).astype("timedelta64[ns]") + buf = io.StringIO() + s.to_csv(buf) + result = buf.getvalue() + expected = ",0\n0,0 days 00:00:00.000000001\n1,0 days 00:00:00.000000001\n" + assert result == expected diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 42a4a55988b0f..007a068125d43 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2207,3 +2207,13 @@ def test_first_row_bom(all_parsers): result = parser.read_csv(StringIO(data), delimiter="\t") expected = DataFrame(columns=["Head1", "Head2", "Head3"]) tm.assert_frame_equal(result, expected) + + +def test_integer_precision(all_parsers): + # Gh 7072 + s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765 +5,1;0;0;0;1;1;843;843;843;1;1;1;1;1;1;0;0;1;1;0;0,64.0,;,4321113141090630389""" + parser = all_parsers + result = parser.read_csv(StringIO(s), header=None)[4] + expected = Series([4321583677327450765, 4321113141090630389], name=4) + tm.assert_series_equal(result, expected) From 437e814141c375b14eb24633ffa9477d19d937f1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 30 Dec 2019 14:14:08 -0600 Subject: [PATCH 2/3] 32-bit compat --- pandas/tests/indexes/test_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 6a3131c6cd7f6..f3c23197c15b3 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -739,7 +739,7 @@ def test_get_indexer(self): def test_get_indexer_nan(self): # GH 7820 result = Index([1, 2, np.nan]).get_indexer([np.nan]) - expected = np.array([2]) + expected = np.array([2], dtype="int64") tm.assert_numpy_array_equal(result, expected) def test_intersection(self): From 4d73d26ca8b20af15459174083eba34b939d9d9f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 31 Dec 2019 06:49:27 -0600 Subject: [PATCH 3/3] fixups --- pandas/tests/indexes/test_numeric.py | 2 +- pandas/tests/io/formats/test_to_csv.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f3c23197c15b3..7187733fc91c3 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -739,7 +739,7 @@ def test_get_indexer(self): def test_get_indexer_nan(self): # GH 7820 result = Index([1, 2, np.nan]).get_indexer([np.nan]) - expected = np.array([2], dtype="int64") + expected = np.array([2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) def test_intersection(self): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index f2f6d2a1e1e02..f86d5480ddafa 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -571,5 +571,10 @@ def test_to_csv_timedelta_precision(self): buf = io.StringIO() s.to_csv(buf) result = buf.getvalue() - expected = ",0\n0,0 days 00:00:00.000000001\n1,0 days 00:00:00.000000001\n" + expected_rows = [ + ",0", + "0,0 days 00:00:00.000000001", + "1,0 days 00:00:00.000000001", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected