pandas-dev · jreback · Feb 16, 2021 · Feb 16, 2021 · Feb 16, 2021 · Feb 16, 2021
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1146,7 +1146,8 @@ def test_setitem_frame_mixed(self, float_string_frame):
         f.loc[key] = piece
         tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values)
 
-        # rows unaligned
+    def test_setitem_frame_mixed_rows_unaligned(self, float_string_frame):
+        # GH#3216 rows unaligned
         f = float_string_frame.copy()
         piece = DataFrame(
             [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]],
@@ -1159,7 +1160,8 @@ def test_setitem_frame_mixed(self, float_string_frame):
             f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2]
         )
 
-        # key is unaligned with values
+    def test_setitem_frame_mixed_key_unaligned(self, float_string_frame):
+        # GH#3216 key is unaligned with values
         f = float_string_frame.copy()
         piece = f.loc[f.index[:2], ["A"]]
         piece.index = f.index[-2:]
@@ -1168,7 +1170,8 @@ def test_setitem_frame_mixed(self, float_string_frame):
         piece["B"] = np.nan
         tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
 
-        # ndarray
+    def test_setitem_frame_mixed_ndarray(self, float_string_frame):
+        # GH#3216 ndarray
         f = float_string_frame.copy()
         piece = float_string_frame.loc[f.index[:2], ["A", "B"]]
         key = (f.index[slice(-2, None)], ["A", "B"])
@@ -1471,28 +1474,14 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
         result.loc[:, idxer] = expected
         tm.assert_frame_equal(result, expected)
 
-    def test_at_time_between_time_datetimeindex(self):
+    def test_loc_setitem_time_key(self):
         index = date_range("2012-01-01", "2012-01-05", freq="30min")
         df = DataFrame(np.random.randn(len(index), 5), index=index)
         akey = time(12, 0, 0)
         bkey = slice(time(13, 0, 0), time(14, 0, 0))
         ainds = [24, 72, 120, 168]
         binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
 
-        result = df.at_time(akey)
-        expected = df.loc[akey]
-        expected2 = df.iloc[ainds]
-        tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result, expected2)
-        assert len(result) == 4
-
-        result = df.between_time(bkey.start, bkey.stop)
-        expected = df.loc[bkey]
-        expected2 = df.iloc[binds]
-        tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result, expected2)
-        assert len(result) == 12
-
         result = df.copy()
         result.loc[akey] = 0
         result = result.loc[akey]
@@ -1529,26 +1518,11 @@ def test_loc_getitem_index_namedtuple(self):
         result = df.loc[IndexType("foo", "bar")]["A"]
         assert result == 1
 
-    @pytest.mark.parametrize(
-        "tpl",
-        [
-            (1,),
-            (
-                1,
-                2,
-            ),
-        ],
-    )
+    @pytest.mark.parametrize("tpl", [(1,), (1, 2)])
     def test_loc_getitem_index_single_double_tuples(self, tpl):
         # GH 20991
         idx = Index(
-            [
-                (1,),
-                (
-                    1,
-                    2,
-                ),
-            ],
+            [(1,), (1, 2)],
             name="A",
             tupleize_cols=False,
         )

diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py
@@ -113,3 +113,16 @@ def test_at_time_axis(self, axis):
         result.index = result.index._with_freq(None)
         expected.index = expected.index._with_freq(None)
         tm.assert_frame_equal(result, expected)
+
+    def test_at_time_datetimeindex(self):
+        index = date_range("2012-01-01", "2012-01-05", freq="30min")
+        df = DataFrame(np.random.randn(len(index), 5), index=index)
+        akey = time(12, 0, 0)
+        ainds = [24, 72, 120, 168]
+
+        result = df.at_time(akey)
+        expected = df.loc[akey]
+        expected2 = df.iloc[ainds]
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected2)
+        assert len(result) == 4
diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py
@@ -194,3 +194,16 @@ def test_between_time_axis_raises(self, axis):
             ts.columns = mask
             with pytest.raises(TypeError, match=msg):
                 ts.between_time(stime, etime, axis=1)
+
+    def test_between_time_datetimeindex(self):
+        index = date_range("2012-01-01", "2012-01-05", freq="30min")
+        df = DataFrame(np.random.randn(len(index), 5), index=index)
+        bkey = slice(time(13, 0, 0), time(14, 0, 0))
+        binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
+
+        result = df.between_time(bkey.start, bkey.stop)
+        expected = df.loc[bkey]
+        expected2 = df.iloc[binds]
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected2)
+        assert len(result) == 12
diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
@@ -391,3 +391,14 @@ def test_describe_when_include_all_exclude_not_allowed(self, exclude):
         msg = "exclude must be None when include is 'all'"
         with pytest.raises(ValueError, match=msg):
             df.describe(include="all", exclude=exclude)
+
+    def test_describe_with_duplicate_columns(self):
+        df = DataFrame(
+            [[1, 1, 1], [2, 2, 2], [3, 3, 3]],
+            columns=["bar", "a", "a"],
+            dtype="float64",
+        )
+        result = df.describe()
+        ser = df.iloc[:, 0].describe()
+        expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
@@ -457,3 +457,13 @@ def test_drop_with_non_unique_multiindex(self):
         result = df.drop(index="x")
         expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]]))
         tm.assert_frame_equal(result, expected)
+
+    def test_drop_with_duplicate_columns(self):
+        df = DataFrame(
+            [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"]
+        )
+        result = df.drop(["a"], axis=1)
+        expected = DataFrame([[1], [1], [1]], columns=["bar"])
+        tm.assert_frame_equal(result, expected)
+        result = df.drop("a", axis=1)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
@@ -644,6 +644,18 @@ def test_reindex_dups(self):
         with pytest.raises(ValueError, match=msg):
             df.reindex(index=list(range(len(df))))
 
+    def test_reindex_with_duplicate_columns(self):
+
+        # reindex is invalid!
+        df = DataFrame(
+            [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"]
+        )
+        msg = "cannot reindex from a duplicate axis"
+        with pytest.raises(ValueError, match=msg):
+            df.reindex(columns=["bar"])
+        with pytest.raises(ValueError, match=msg):
+            df.reindex(columns=["bar", "foo"])
+
     def test_reindex_axis_style(self):
         # https://github.com/pandas-dev/pandas/issues/12392
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py
@@ -4,11 +4,14 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     Index,
     MultiIndex,
     Series,
+    merge,
 )
 import pandas._testing as tm
 
@@ -357,3 +360,45 @@ def test_rename_mapper_and_positional_arguments_raises(self):
 
         with pytest.raises(TypeError, match=msg):
             df.rename({}, columns={}, index={})
+
+    @td.skip_array_manager_not_yet_implemented
+    def test_rename_with_duplicate_columns(self):
+        # GH#4403
+        df4 = DataFrame(
+            {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]},
+            index=MultiIndex.from_tuples(
+                [(600809, 20130331)], names=["STK_ID", "RPT_Date"]
+            ),
+        )
+
+        df5 = DataFrame(
+            {
+                "RPT_Date": [20120930, 20121231, 20130331],
+                "STK_ID": [600809] * 3,
+                "STK_Name": ["饡驦", "饡驦", "饡驦"],
+                "TClose": [38.05, 41.66, 30.01],
+            },
+            index=MultiIndex.from_tuples(
+                [(600809, 20120930), (600809, 20121231), (600809, 20130331)],
+                names=["STK_ID", "RPT_Date"],
+            ),
+        )
+        # TODO: can we construct this without merge?
+        k = merge(df4, df5, how="inner", left_index=True, right_index=True)
+        result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"})
+        str(result)
+        result.dtypes
+
+        expected = DataFrame(
+            [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]],
+            columns=[
+                "RT",
+                "TClose",
+                "TExg",
+                "RPT_Date",
+                "STK_ID",
+                "STK_Name",
+                "QT_Close",
+            ],
+        ).set_index(["STK_ID", "RPT_Date"], drop=False)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py
@@ -55,6 +55,12 @@ def test_values_duplicates(self):
 
         tm.assert_numpy_array_equal(result, expected)
 
+    def test_values_with_duplicate_columns(self):
+        df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"])
+        result = df.values
+        expected = np.array([[1, 2.5], [3, 4.5]])
+        assert (result == expected).all().all()
+
     @pytest.mark.parametrize("constructor", [date_range, period_range])
     def test_values_casts_datetimelike_to_object(self, constructor):
         series = Series(constructor("2000-01-01", periods=10, freq="D"))

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -57,6 +57,21 @@ def any(self, axis=None):
 class TestFrameComparisons:
     # Specifically _not_ flex-comparisons
 
+    def test_comparison_with_categorical_dtype(self):
+        # GH#12564
+
+        df = DataFrame({"A": ["foo", "bar", "baz"]})
+        exp = DataFrame({"A": [True, False, False]})
+
+        res = df == "foo"
+        tm.assert_frame_equal(res, exp)
+
+        # casting to categorical shouldn't affect the result
+        df["A"] = df["A"].astype("category")
+
+        res = df == "foo"
+        tm.assert_frame_equal(res, exp)
+
     def test_frame_in_list(self):
         # GH#12689 this should raise at the DataFrame level, not blocks
         df = DataFrame(np.random.randn(6, 4), columns=list("ABCD"))
@@ -597,6 +612,26 @@ def test_flex_add_scalar_fill_value(self):
         res = df.add(2, fill_value=0)
         tm.assert_frame_equal(res, exp)
 
+    def test_sub_alignment_with_duplicate_index(self):
+        # GH#5185 dup aligning operations should work
+        df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])
+        df2 = DataFrame([1, 2, 3], index=[1, 2, 3])
+        expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])
+        result = df1.sub(df2)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"])
+    def test_arithmetic_with_duplicate_columns(self, op):
+        # operations
+        df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)})
+        expected = getattr(df, op)(df)
+        expected.columns = ["A", "A"]
+        df.columns = ["A", "A"]
+        result = getattr(df, op)(df)
+        tm.assert_frame_equal(result, expected)
+        str(result)
+        result.dtypes
+
 
 class TestFrameArithmetic:
     def test_td64_op_nat_casting(self):