From c35c8c5cebbdffcb5677c0b08212a7241b15dec1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 20:34:28 -0700 Subject: [PATCH 1/7] Add tests for GH 11747 and 11726 --- pandas/tests/arrays/sparse/test_array.py | 8 ++++++++ pandas/tests/series/indexing/test_getitem.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index a96e5b07b7f7e..b29855caf6c1d 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1313,6 +1313,14 @@ def test_dropna(fill_value): tm.assert_equal(df.dropna(), expected_df) +def test_drop_duplicates_fill_value(): + # GH 11726 + df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0)) + result = df.drop_duplicates() + expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)}) + tm.assert_frame_equal(result, expected) + + class TestMinMax: plain_data = np.arange(5).astype(float) data_neg = plain_data * (-1) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 0e43e351bc082..8793026ee74ab 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -662,3 +662,11 @@ def test_getitem_categorical_str(): def test_slice_can_reorder_not_uniquely_indexed(): ser = Series(1, index=["a", "a", "b", "b", "c"]) ser[::-1] # it works! + + +@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"]) +def test_duplicated_index_getitem_positional_indexer(index_vals): + # GH 11747 + s = Series(range(5), index=list(index_vals)) + result = s[3] + assert result == 3 From 6853e6d805250db5134c0749e6c12f73bea405d7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 20:51:40 -0700 Subject: [PATCH 2/7] Add test for GH 12652 --- pandas/tests/groupby/test_apply_mutate.py | 60 +++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index 529f76bf692ce..05c1f5b716f40 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -68,3 +68,63 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) + + +def test_apply_mutate_columns_multiindex(): + # GH 12652 + df = pd.DataFrame( + { + ("C", "julian"): [1, 2, 3], + ("B", "geoffrey"): [1, 2, 3], + ("A", "julian"): [1, 2, 3], + ("B", "julian"): [1, 2, 3], + ("A", "geoffrey"): [1, 2, 3], + ("C", "geoffrey"): [1, 2, 3], + }, + columns=pd.MultiIndex.from_tuples( + [ + ("A", "julian"), + ("A", "geoffrey"), + ("B", "julian"), + ("B", "geoffrey"), + ("C", "julian"), + ("C", "geoffrey"), + ] + ), + ) + + def add_column(grouped): + name = grouped.columns[0][1] + grouped["sum", name] = grouped.sum(axis=1) + return grouped + + result = df.groupby(level=1, axis=1).apply(add_column) + expected = pd.DataFrame( + [ + [1, 1, 1, 3, 1, 1, 1, 3], + [2, 2, 2, 6, 2, 2, 2, 6], + [ + 3, + 3, + 3, + 9, + 3, + 3, + 3, + 9, + ], + ], + columns=pd.MultiIndex.from_tuples( + [ + ("geoffrey", "A", "geoffrey"), + ("geoffrey", "B", "geoffrey"), + ("geoffrey", "C", "geoffrey"), + ("geoffrey", "sum", "geoffrey"), + ("julian", "A", "julian"), + ("julian", "B", "julian"), + ("julian", "C", "julian"), + ("julian", "sum", "julian"), + ] + ), + ) + tm.assert_frame_equal(result, expected) From 04acf6cb72cb6b9ec2dabe80dc9d51fd4b33d58b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 20:57:15 -0700 Subject: [PATCH 3/7] Add test for GH 12859 --- pandas/tests/frame/methods/test_to_dict.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 6d0d4e045e491..022b0f273493b 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -304,3 +304,10 @@ def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype): d = df.to_dict(orient="records") result = type(d[0]["a"]) assert result is expected_dtype + + def test_to_dict_mixed_numeric_frame(self): + # GH 12859 + df = DataFrame({"a": [1.0], "b": [9.0]}) + result = df.reset_index().to_dict("records") + expected = [{"index": 0, "a": 1.0, "b": 9.0}] + assert result == expected From 443ecd4bcef25d1916bdd18b5b0cccc99d4320f9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 21:01:49 -0700 Subject: [PATCH 4/7] Add test for GH 13034 --- pandas/tests/series/indexing/test_setitem.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 675120e03d821..3f850dfbc6a39 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -286,6 +286,13 @@ def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(self): expected = Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") tm.assert_series_equal(result, expected) + def test_setitem_nan_with_bool(self): + # GH 13034 + result = Series([True, False, True]) + result[0] = np.nan + expected = Series([np.nan, False, True], dtype=object) + tm.assert_series_equal(result, expected) + class TestSetitemViewCopySemantics: def test_setitem_invalidates_datetime_index_freq(self): From c48bb8572956e0a6f3da153a693e31fde0bd9420 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 21:21:41 -0700 Subject: [PATCH 5/7] Add test for GH 13120, add missing __init__, fix local NameError --- pandas/tests/indexes/object/__init__.py | 0 pandas/tests/indexes/test_base.py | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/indexes/object/__init__.py diff --git a/pandas/tests/indexes/object/__init__.py b/pandas/tests/indexes/object/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b5822b768fdde..47657fff56ceb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1366,7 +1366,7 @@ async def test_tab_complete_warning(self, ip): pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter - code = "import pandas as pd; idx = Index([1, 2])" + code = "import pandas as pd; idx = pd.Index([1, 2])" await ip.run_code(code) # GH 31324 newer jedi version raises Deprecation warning; @@ -1720,3 +1720,21 @@ def test_validate_1d_input(): ser = Series(0, range(4)) with pytest.raises(ValueError, match=msg): ser.index = np.array([[2, 3]] * 4) + + +@pytest.mark.parametrize( + "klass, extra_kwargs", + [ + [Index, {}], + [Int64Index, {}], + [Float64Index, {}], + [DatetimeIndex, {}], + [TimedeltaIndex, {}], + [PeriodIndex, {"freq": "Y"}], + ], +) +def test_construct_from_memoryview(klass, extra_kwargs): + # GH 13120 + result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs) + expected = klass(range(2000, 2005), **extra_kwargs) + tm.assert_index_equal(result, expected) From 38a00ce2c3fbf5e771d08642264cfb0de6d0f8f8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 21:30:30 -0700 Subject: [PATCH 6/7] Add test for GH 13217 --- pandas/tests/groupby/test_apply.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 117612696df11..2f87f4a19b93f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1121,3 +1121,27 @@ def test_apply_dropna_with_indexed_same(): ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "as_index, expected", + [ + [ + False, + DataFrame( + [[1, 1, 1], [2, 2, 1]], columns=Index(["a", "b", None], dtype=object) + ), + ], + [ + True, + Series( + [1, 1], index=MultiIndex.from_tuples([(1, 1), (2, 2)], names=["a", "b"]) + ), + ], + ], +) +def test_apply_as_index_constant_lambda(as_index, expected): + # GH 13217 + df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]}) + result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1) + tm.assert_equal(result, expected) From f02601f70bf9973a8ffb1065abbdc2a407b9fa94 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 11 May 2021 21:40:26 -0700 Subject: [PATCH 7/7] Add test for GH 13353 --- pandas/tests/reshape/merge/test_merge.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 1495a34274a94..edd100219143c 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2446,3 +2446,14 @@ def test_merge_duplicate_columns_with_suffix_causing_another_duplicate(): result = merge(left, right, on="a") expected = DataFrame([[1, 1, 1, 1, 2]], columns=["a", "b_x", "b_x", "b_x", "b_y"]) tm.assert_frame_equal(result, expected) + + +def test_merge_string_float_column_result(): + # GH 13353 + df1 = DataFrame([[1, 2], [3, 4]], columns=pd.Index(["a", 114.0])) + df2 = DataFrame([[9, 10], [11, 12]], columns=["x", "y"]) + result = merge(df2, df1, how="inner", left_index=True, right_index=True) + expected = DataFrame( + [[9, 10, 1, 2], [11, 12, 3, 4]], columns=pd.Index(["x", "y", "a", 114.0]) + ) + tm.assert_frame_equal(result, expected)