pandas-dev · jreback · Feb 15, 2021 · Feb 13, 2021 · Feb 13, 2021 · jreback
diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py
@@ -10,6 +10,7 @@
     MultiIndex,
     Series,
     Timestamp,
+    concat,
     get_dummies,
     period_range,
 )
@@ -176,6 +177,87 @@ def test_getitem_bool_mask_categorical_index(self):
         with pytest.raises(TypeError, match=msg):
             df4[df4.index > 1]
 
+    @pytest.mark.parametrize(
+        "data1,data2,expected_data",
+        (
+            (
+                [[1, 2], [3, 4]],
+                [[0.5, 6], [7, 8]],
+                [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
+            ),
+            (
+                [[1, 2], [3, 4]],
+                [[5, 6], [7, 8]],
+                [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
+            ),
+        ),
+    )
+    def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
+        self,
+        data1,
+        data2,
+        expected_data,
+    ):
+        # GH#31954
+
+        df1 = DataFrame(np.array(data1))
+        df2 = DataFrame(np.array(data2))
+        df = concat([df1, df2], axis=1)
+
+        result = df[df > 2]
+
+        exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
+        expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.fixture
+    def df_dup_cols(self):
+        dups = ["A", "A", "C", "D"]
+        df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
+        return df
+
+    def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
+        # `df.A > 6` is a DataFrame with a different shape from df
+
+        # boolean with the duplicate raises
+        df = df_dup_cols
+        msg = "cannot reindex from a duplicate axis"
+        with pytest.raises(ValueError, match=msg):
+            df[df.A > 6]
+
+    def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
+        # boolean indexing
+        # GH#4879
+        df = DataFrame(
+            np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
+        )
+        expected = df[df.C > 6]
+        expected.columns = df_dup_cols.columns
+
+        df = df_dup_cols
+        result = df[df.C > 6]
+
+        tm.assert_frame_equal(result, expected)
+        result.dtypes
+        str(result)
+
+    def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
+
+        # where
+        df = DataFrame(
+            np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
+        )
+        # `df > 6` is a DataFrame with the same shape+alignment as df
+        expected = df[df > 6]
+        expected.columns = df_dup_cols.columns
+
+        df = df_dup_cols
+        result = df[df > 6]
+
+        tm.assert_frame_equal(result, expected)
+        result.dtypes
+        str(result)
+
 
 class TestGetitemSlice:
     def test_getitem_slice_float64(self, frame_or_series):

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -2,18 +2,26 @@
 import pytest
 
 from pandas.core.dtypes.base import registry as ea_registry
+from pandas.core.dtypes.common import (
+    is_categorical_dtype,
+    is_interval_dtype,
+    is_object_dtype,
+)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
 
 from pandas import (
     Categorical,
     DataFrame,
+    DatetimeIndex,
     Index,
     Interval,
+    IntervalIndex,
     NaT,
     Period,
     PeriodIndex,
     Series,
     Timestamp,
+    cut,
     date_range,
     notna,
     period_range,
@@ -395,6 +403,90 @@ def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
         with pytest.raises(ValueError, match=msg):
             df[["a", "b"]] = rhs
 
+    def test_setitem_intervals(self):
+
+        df = DataFrame({"A": range(10)})
+        ser = cut(df["A"], 5)
+        assert isinstance(ser.cat.categories, IntervalIndex)
+
+        # B & D end up as Categoricals
+        # the remainer are converted to in-line objects
+        # contining an IntervalIndex.values
+        df["B"] = ser
+        df["C"] = np.array(ser)
+        df["D"] = ser.values
+        df["E"] = np.array(ser.values)
+
+        assert is_categorical_dtype(df["B"].dtype)
+        assert is_interval_dtype(df["B"].cat.categories)
+        assert is_categorical_dtype(df["D"].dtype)
+        assert is_interval_dtype(df["D"].cat.categories)
+
+        assert is_object_dtype(df["C"])
+        assert is_object_dtype(df["E"])
+
+        # they compare equal as Index
+        # when converted to numpy objects
+        c = lambda x: Index(np.array(x))
+        tm.assert_index_equal(c(df.B), c(df.B))
+        tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
+        tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
+        tm.assert_index_equal(c(df.C), c(df.D), check_names=False)
+
+        # B & D are the same Series
+        tm.assert_series_equal(df["B"], df["B"])
+        tm.assert_series_equal(df["B"], df["D"], check_names=False)
+
+        # C & E are the same Series
+        tm.assert_series_equal(df["C"], df["C"])
+        tm.assert_series_equal(df["C"], df["E"], check_names=False)
+
+
+class TestSetitemTZAwareValues:
+    @pytest.fixture
+    def idx(self):
+        naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
+        idx = naive.tz_localize("US/Pacific")
+        return idx
+
+    @pytest.fixture
+    def expected(self, idx):
+        expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
+        assert expected.dtype == idx.dtype
+        return expected
+
+    def test_setitem_dt64series(self, idx, expected):
+        # convert to utc
+        df = DataFrame(np.random.randn(2, 1), columns=["A"])
+        df["B"] = idx
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            df["B"] = idx.to_series(keep_tz=False, index=[0, 1])
+        msg = "do 'idx.tz_convert(None)' before calling"
+        assert msg in str(m[0].message)
+
+        result = df["B"]
+        comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B")
+        tm.assert_series_equal(result, comp)
+
+    def test_setitem_datetimeindex(self, idx, expected):
+        # setting a DataFrame column with a tzaware DTI retains the dtype
+        df = DataFrame(np.random.randn(2, 1), columns=["A"])
+
+        # assign to frame
+        df["B"] = idx
+        result = df["B"]
+        tm.assert_series_equal(result, expected)
+
+    def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):
+        # setting a DataFrame column with a tzaware DTI retains the dtype
+        df = DataFrame(np.random.randn(2, 1), columns=["A"])
+
+        # object array of datetimes with a tz
+        df["B"] = idx.to_pydatetime()
+        result = df["B"]
+        tm.assert_series_equal(result, expected)
+
 
 class TestDataFrameSetItemWithExpansion:
     def test_setitem_listlike_views(self):

diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
@@ -21,6 +21,43 @@
 import pandas.core.common as com
 
 
+class TestReindexSetIndex:
+    # Tests that check both reindex and set_index
+
+    def test_dti_set_index_reindex_datetimeindex(self):
+        # GH#6631
+        df = DataFrame(np.random.random(6))
+        idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern")
+        idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo")
+
+        df = df.set_index(idx1)
+        tm.assert_index_equal(df.index, idx1)
+        df = df.reindex(idx2)
+        tm.assert_index_equal(df.index, idx2)
+
+    def test_dti_set_index_reindex_freq_with_tz(self):
+        # GH#11314 with tz
+        index = date_range(
+            datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern"
+        )
+        df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index)
+        new_index = date_range(
+            datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern"
+        )
+
+        result = df.set_index(new_index)
+        assert result.index.freq == index.freq
+
+    def test_set_reset_index_intervalindex(self):
+
+        df = DataFrame({"A": range(10)})
+        ser = pd.cut(df.A, 5)
+        df["B"] = ser
+        df = df.set_index("B")
+
+        df = df.reset_index()
+
+
 class TestDataFrameSelectReindex:
     # These are specific reindex-based tests; other indexing tests should go in
     # test_indexing

diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py
@@ -1,3 +1,7 @@
+"""
+See also: test_reindex.py:TestReindexSetIndex
+"""
+
 from datetime import datetime, timedelta
 
 import numpy as np