diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 13a93e3efc48c..6d4ce3fa0dd4e 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -1,4 +1,7 @@ -from pandas import DataFrame, Series +import numpy as np +import pytest + +from pandas import DataFrame, Index, Series import pandas._testing as tm @@ -34,3 +37,85 @@ def test_count_objects(self, float_string_frame): tm.assert_series_equal(dm.count(), df.count()) tm.assert_series_equal(dm.count(1), df.count(1)) + + def test_count_level_corner(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + ser = frame["A"][:0] + result = ser.count(level=0) + expected = Series(0, index=ser.index.levels[0], name="A") + tm.assert_series_equal(result, expected) + + df = frame[:0] + result = df.count(level=0) + expected = ( + DataFrame( + index=ser.index.levels[0].set_names(["first"]), columns=df.columns + ) + .fillna(0) + .astype(np.int64) + ) + tm.assert_frame_equal(result, expected) + + def test_count_index_with_nan(self): + # https://github.com/pandas-dev/pandas/issues/21824 + df = DataFrame( + { + "Person": ["John", "Myla", None, "John", "Myla"], + "Age": [24.0, 5, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + + # count on row labels + res = df.set_index(["Person", "Single"]).count(level="Person") + expected = DataFrame( + index=Index(["John", "Myla"], name="Person"), + columns=Index(["Age"]), + data=[2, 2], + ) + tm.assert_frame_equal(res, expected) + + # count on column labels + res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) + expected = DataFrame( + columns=Index(["John", "Myla"], name="Person"), + index=Index(["Age"]), + data=[[2, 2]], + ) + tm.assert_frame_equal(res, expected) + + def test_count_level( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + def _check_counts(frame, axis=0): + index = frame._get_axis(axis) + for i in range(index.nlevels): + result = frame.count(axis=axis, level=i) + expected = frame.groupby(axis=axis, level=i).count() + expected = expected.reindex_like(result).astype("i8") + tm.assert_frame_equal(result, expected) + + frame.iloc[1, [1, 2]] = np.nan + frame.iloc[7, [0, 1]] = np.nan + ymd.iloc[1, [1, 2]] = np.nan + ymd.iloc[7, [0, 1]] = np.nan + + _check_counts(frame) + _check_counts(ymd) + _check_counts(frame.T, axis=1) + _check_counts(ymd.T, axis=1) + + # can't call with level on regular DataFrame + df = tm.makeTimeDataFrame() + with pytest.raises(TypeError, match="hierarchical"): + df.count(level=0) + + frame["D"] = "foo" + result = frame.count(level=0, numeric_only=True) + tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index fccb3f10dde45..2926e29e61d56 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -1,4 +1,6 @@ -from pandas import DataFrame, Series +import numpy as np + +from pandas import DataFrame, MultiIndex, Series import pandas._testing as tm @@ -38,3 +40,28 @@ def test_pop_non_unique_cols(self): assert "b" in df.columns assert "a" not in df.columns assert len(df.index) == 2 + + def test_mixed_depth_pop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + df1 = df.copy() + df2 = df.copy() + result = df1.pop("a") + expected = df2.pop(("a", "", "")) + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) + assert result.name == "a" + + expected = df1["top"] + df1 = df1.drop(["top"], axis=1) + result = df2.pop("top") + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index ebe7eabd53b46..8927ab7c5ef79 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -3,7 +3,16 @@ import numpy as np import pytest -from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + date_range, + period_range, + to_datetime, +) import pandas._testing as tm @@ -352,6 +361,112 @@ def test_construction_with_categorical_index(self): idf = idf.reset_index().set_index("B") tm.assert_index_equal(idf.index, ci) + def test_set_index_datetime(self): + # GH#3950 + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + "value": range(6), + } + ) + df.index = to_datetime(df.pop("datetime"), utc=True) + df.index = df.index.tz_convert("US/Pacific") + + expected = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + name="datetime", + ) + expected = expected.tz_localize("UTC").tz_convert("US/Pacific") + + df = df.set_index("label", append=True) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) + assert df.index.names == ["datetime", "label"] + + df = df.swaplevel(0, 1) + tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) + tm.assert_index_equal(df.index.levels[1], expected) + assert df.index.names == ["label", "datetime"] + + df = DataFrame(np.random.random(6)) + idx1 = DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Eastern", + ) + idx2 = DatetimeIndex( + [ + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + ], + tz="US/Eastern", + ) + idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") + idx3 = idx3._with_freq(None) + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="US/Eastern", + ) + expected2 = DatetimeIndex( + ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern" + ) + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + # GH#7092 + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + def test_set_index_period(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range("2011-01-01", periods=3, freq="M") + idx1 = idx1.append(idx1) + idx2 = period_range("2013-01-01 09:00", periods=2, freq="H") + idx2 = idx2.append(idx2).append(idx2) + idx3 = period_range("2005", periods=6, freq="A") + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = period_range("2011-01-01", periods=3, freq="M") + expected2 = period_range("2013-01-01 09:00", periods=2, freq="H") + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + class TestSetIndexInvalid: def test_set_index_verify_integrity(self, frame_of_index_cols): diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index a106702aff807..da2f90c1c4e32 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -662,6 +662,12 @@ def test_sort_index_level_mixed(self): sorted_after.drop([("foo", "three")], axis=1), ) + def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + result = frame.sort_index() + assert result.index.names == frame.index.names + class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 9cf5afc09e800..54e0e8d0ec8d7 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1195,6 +1195,40 @@ def test_preserve_timezone(self, initial: str, method): result = getattr(df, method)(axis=1) tm.assert_series_equal(result, expected) + def test_frame_any_all_with_level(self): + df = DataFrame( + {"data": [False, False, True, False, True, False, True]}, + index=[ + ["one", "one", "two", "one", "two", "two", "two"], + [0, 1, 0, 2, 1, 2, 3], + ], + ) + + result = df.any(level=0) + ex = DataFrame({"data": [False, True]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + result = df.all(level=0) + ex = DataFrame({"data": [False, False]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + def test_frame_any_with_timedelta(self): + # GH#17667 + df = DataFrame( + { + "a": Series([0, 0]), + "t": Series([pd.to_timedelta(0, "s"), pd.to_timedelta(1, "ms")]), + } + ) + + result = df.any(axis=0) + expected = Series(data=[False, True], index=["a", "t"]) + tm.assert_series_equal(result, expected) + + result = df.any(axis=1) + expected = Series(data=[False, True]) + tm.assert_series_equal(result, expected) + def test_mixed_frame_with_integer_sum(): # https://github.com/pandas-dev/pandas/issues/34520 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 593d1c78a19e2..229d9de5eaad2 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1,6 +1,7 @@ import numpy as np +import pytest -from pandas import MultiIndex, NaT, Series, date_range +from pandas import MultiIndex, NaT, Series, date_range, period_range import pandas.testing as tm @@ -26,3 +27,15 @@ def test_setitem_multiindex_empty_slice(self): expected = result.copy() result.loc[[]] = 0 tm.assert_series_equal(result, expected) + + +class TestSetitemPeriodDtype: + @pytest.mark.parametrize("na_val", [None, np.nan]) + def test_setitem_na_period_dtype_casts_to_nat(self, na_val): + ser = Series(period_range("2000-01-01", periods=10, freq="D")) + + ser[3] = na_val + assert ser[3] is NaT + + ser[3:5] = na_val + assert ser[4] is NaT diff --git a/pandas/tests/series/methods/test_repeat.py b/pandas/tests/series/methods/test_repeat.py index b8e01e79ea02f..32f7384d34ebd 100644 --- a/pandas/tests/series/methods/test_repeat.py +++ b/pandas/tests/series/methods/test_repeat.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import Series +from pandas import MultiIndex, Series import pandas._testing as tm @@ -28,3 +28,10 @@ def test_numpy_repeat(self): msg = "the 'axis' parameter is not supported" with pytest.raises(ValueError, match=msg): np.repeat(ser, 2, axis=0) + + def test_repeat_with_multiindex(self): + # GH#9361, fixed by GH#7891 + m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) + data = ["a", "b", "c", "d"] + m_df = Series(data, index=m_idx) + assert m_df.repeat(3).shape == (3 * len(data),) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 52a398a00dfe5..2c32342f8802a 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -49,21 +49,6 @@ def test_NaT_cast(self): expected = Series([pd.NaT], dtype="period[D]") tm.assert_series_equal(result, expected) - def test_set_none(self): - self.series[3] = None - assert self.series[3] is pd.NaT - - self.series[3:5] = None - assert self.series[4] is pd.NaT - - def test_set_nan(self): - # Do we want to allow this? - self.series[5] = np.nan - assert self.series[5] is pd.NaT - - self.series[5:7] = np.nan - assert self.series[6] is pd.NaT - def test_intercept_astype_object(self): expected = self.series.astype("object") diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 129dc275c4d5a..03198ec3289dd 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas._libs import join as _join +from pandas._libs import join as libjoin from pandas import Categorical, DataFrame, Index, merge import pandas._testing as tm @@ -12,7 +12,7 @@ class TestIndexer: "dtype", ["int32", "int64", "float32", "float64", "object"] ) def test_outer_join_indexer(self, dtype): - indexer = _join.outer_join_indexer + indexer = libjoin.outer_join_indexer left = np.arange(3, dtype=dtype) right = np.arange(2, 5, dtype=dtype) @@ -47,7 +47,7 @@ def test_left_join_indexer_unique(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([2, 2, 3, 4, 4], dtype=np.int64) - result = _join.left_join_indexer_unique(b, a) + result = libjoin.left_join_indexer_unique(b, a) expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) @@ -162,7 +162,7 @@ def test_left_outer_join_bug(): right = np.array([3, 1], dtype=np.int64) max_groups = 4 - lidx, ridx = _join.left_outer_join(left, right, max_groups, sort=False) + lidx, ridx = libjoin.left_outer_join(left, right, max_groups, sort=False) exp_lidx = np.arange(len(left), dtype=np.int64) exp_ridx = -np.ones(len(left), dtype=np.int64) @@ -178,7 +178,7 @@ def test_inner_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - index, ares, bres = _join.inner_join_indexer(a, b) + index, ares, bres = libjoin.inner_join_indexer(a, b) index_exp = np.array([3, 5], dtype=np.int64) tm.assert_almost_equal(index, index_exp) @@ -191,7 +191,7 @@ def test_inner_join_indexer(): a = np.array([5], dtype=np.int64) b = np.array([5], dtype=np.int64) - index, ares, bres = _join.inner_join_indexer(a, b) + index, ares, bres = libjoin.inner_join_indexer(a, b) tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) @@ -201,7 +201,7 @@ def test_outer_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - index, ares, bres = _join.outer_join_indexer(a, b) + index, ares, bres = libjoin.outer_join_indexer(a, b) index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) tm.assert_almost_equal(index, index_exp) @@ -214,7 +214,7 @@ def test_outer_join_indexer(): a = np.array([5], dtype=np.int64) b = np.array([5], dtype=np.int64) - index, ares, bres = _join.outer_join_indexer(a, b) + index, ares, bres = libjoin.outer_join_indexer(a, b) tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) @@ -224,7 +224,7 @@ def test_left_join_indexer(): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - index, ares, bres = _join.left_join_indexer(a, b) + index, ares, bres = libjoin.left_join_indexer(a, b) tm.assert_almost_equal(index, a) @@ -236,7 +236,7 @@ def test_left_join_indexer(): a = np.array([5], dtype=np.int64) b = np.array([5], dtype=np.int64) - index, ares, bres = _join.left_join_indexer(a, b) + index, ares, bres = libjoin.left_join_indexer(a, b) tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) @@ -246,7 +246,7 @@ def test_left_join_indexer2(): idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = _join.left_join_indexer(idx2.values, idx.values) + res, lidx, ridx = libjoin.left_join_indexer(idx2.values, idx.values) exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) tm.assert_almost_equal(res, exp_res) @@ -262,7 +262,7 @@ def test_outer_join_indexer2(): idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = _join.outer_join_indexer(idx2.values, idx.values) + res, lidx, ridx = libjoin.outer_join_indexer(idx2.values, idx.values) exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) tm.assert_almost_equal(res, exp_res) @@ -278,7 +278,7 @@ def test_inner_join_indexer2(): idx = Index([1, 1, 2, 5]) idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = _join.inner_join_indexer(idx2.values, idx.values) + res, lidx, ridx = libjoin.inner_join_indexer(idx2.values, idx.values) exp_res = np.array([1, 1, 2, 5], dtype=np.int64) tm.assert_almost_equal(res, exp_res) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9c29d3a062dfa..c5d678f412601 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -224,69 +224,6 @@ def test_reset_index_with_drop( assert isinstance(deleveled, Series) assert deleveled.index.name == self.series.index.name - def test_count_level( - self, - multiindex_year_month_day_dataframe_random_data, - multiindex_dataframe_random_data, - ): - ymd = multiindex_year_month_day_dataframe_random_data - frame = multiindex_dataframe_random_data - - def _check_counts(frame, axis=0): - index = frame._get_axis(axis) - for i in range(index.nlevels): - result = frame.count(axis=axis, level=i) - expected = frame.groupby(axis=axis, level=i).count() - expected = expected.reindex_like(result).astype("i8") - tm.assert_frame_equal(result, expected) - - frame.iloc[1, [1, 2]] = np.nan - frame.iloc[7, [0, 1]] = np.nan - ymd.iloc[1, [1, 2]] = np.nan - ymd.iloc[7, [0, 1]] = np.nan - - _check_counts(frame) - _check_counts(ymd) - _check_counts(frame.T, axis=1) - _check_counts(ymd.T, axis=1) - - # can't call with level on regular DataFrame - df = tm.makeTimeDataFrame() - with pytest.raises(TypeError, match="hierarchical"): - df.count(level=0) - - frame["D"] = "foo" - result = frame.count(level=0, numeric_only=True) - tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) - - def test_count_index_with_nan(self): - # https://github.com/pandas-dev/pandas/issues/21824 - df = DataFrame( - { - "Person": ["John", "Myla", None, "John", "Myla"], - "Age": [24.0, 5, 21.0, 33, 26], - "Single": [False, True, True, True, False], - } - ) - - # count on row labels - res = df.set_index(["Person", "Single"]).count(level="Person") - expected = DataFrame( - index=Index(["John", "Myla"], name="Person"), - columns=Index(["Age"]), - data=[2, 2], - ) - tm.assert_frame_equal(res, expected) - - # count on column labels - res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) - expected = DataFrame( - columns=Index(["John", "Myla"], name="Person"), - index=Index(["Age"]), - data=[[2, 2]], - ) - tm.assert_frame_equal(res, expected) - def test_count_level_series(self): index = MultiIndex( levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]], @@ -307,23 +244,6 @@ def test_count_level_series(self): result.astype("f8"), expected.reindex(result.index).fillna(0) ) - def test_count_level_corner(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - s = frame["A"][:0] - result = s.count(level=0) - expected = Series(0, index=s.index.levels[0], name="A") - tm.assert_series_equal(result, expected) - - df = frame[:0] - result = df.count(level=0) - expected = ( - DataFrame(index=s.index.levels[0].set_names(["first"]), columns=df.columns) - .fillna(0) - .astype(np.int64) - ) - tm.assert_frame_equal(result, expected) - def test_get_level_number_out_of_bounds(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -1138,40 +1058,6 @@ def test_stat_op_corner(self): expected = Series([10.0], index=[2]) tm.assert_series_equal(result, expected) - def test_frame_any_all_group(self): - df = DataFrame( - {"data": [False, False, True, False, True, False, True]}, - index=[ - ["one", "one", "two", "one", "two", "two", "two"], - [0, 1, 0, 2, 1, 2, 3], - ], - ) - - result = df.any(level=0) - ex = DataFrame({"data": [False, True]}, index=["one", "two"]) - tm.assert_frame_equal(result, ex) - - result = df.all(level=0) - ex = DataFrame({"data": [False, False]}, index=["one", "two"]) - tm.assert_frame_equal(result, ex) - - def test_series_any_timedelta(self): - # GH 17667 - df = DataFrame( - { - "a": Series([0, 0]), - "t": Series([pd.to_timedelta(0, "s"), pd.to_timedelta(1, "ms")]), - } - ) - - result = df.any(axis=0) - expected = Series(data=[False, True], index=["a", "t"]) - tm.assert_series_equal(result, expected) - - result = df.any(axis=1) - expected = Series(data=[False, True]) - tm.assert_series_equal(result, expected) - def test_std_var_pass_ddof(self): index = MultiIndex.from_arrays( [np.arange(5).repeat(10), np.tile(np.arange(10), 5)] @@ -1349,31 +1235,6 @@ def test_level_with_tuples(self): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - def test_mixed_depth_pop(self): - arrays = [ - ["a", "top", "top", "routine1", "routine1", "routine2"], - ["", "OD", "OD", "result1", "result2", "result1"], - ["", "wx", "wy", "", "", ""], - ] - - tuples = sorted(zip(*arrays)) - index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4, 6), columns=index) - - df1 = df.copy() - df2 = df.copy() - result = df1.pop("a") - expected = df2.pop(("a", "", "")) - tm.assert_series_equal(expected, result, check_names=False) - tm.assert_frame_equal(df1, df2) - assert result.name == "a" - - expected = df1["top"] - df1 = df1.drop(["top"], axis=1) - result = df2.pop("top") - tm.assert_frame_equal(expected, result) - tm.assert_frame_equal(df1, df2) - def test_reindex_level_partial_selection(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -1517,180 +1378,98 @@ def test_multiindex_set_index(self): # it works! df.set_index(index) - def test_set_index_datetime(self): + def test_reset_index_datetime(self, tz_naive_fixture): # GH 3950 + tz = tz_naive_fixture + idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) df = DataFrame( - { - "label": ["a", "a", "a", "b", "b", "b"], - "datetime": [ - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - ], - "value": range(6), - } + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, ) - df.index = pd.to_datetime(df.pop("datetime"), utc=True) - df.index = df.index.tz_convert("US/Pacific") - expected = pd.DatetimeIndex( - ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], - name="datetime", + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], ) - expected = expected.tz_localize("UTC").tz_convert("US/Pacific") - - df = df.set_index("label", append=True) - tm.assert_index_equal(df.index.levels[0], expected) - tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) - assert df.index.names == ["datetime", "label"] + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - df = df.swaplevel(0, 1) - tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) - tm.assert_index_equal(df.index.levels[1], expected) - assert df.index.names == ["label", "datetime"] + tm.assert_frame_equal(df.reset_index(), expected) - df = DataFrame(np.random.random(6)) - idx1 = pd.DatetimeIndex( - [ - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - "2011-07-19 07:00:00", - "2011-07-19 08:00:00", - "2011-07-19 09:00:00", - ], - tz="US/Eastern", + idx3 = pd.date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" ) - idx2 = pd.DatetimeIndex( - [ - "2012-04-01 09:00", - "2012-04-01 09:00", - "2012-04-01 09:00", - "2012-04-02 09:00", - "2012-04-02 09:00", - "2012-04-02 09:00", - ], - tz="US/Eastern", + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, ) - idx3 = pd.date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") - idx3 = idx3._with_freq(None) - - df = df.set_index(idx1) - df = df.set_index(idx2, append=True) - df = df.set_index(idx3, append=True) - expected1 = pd.DatetimeIndex( - ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], - tz="US/Eastern", + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], ) - expected2 = pd.DatetimeIndex( - ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern" + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") ) + tm.assert_frame_equal(df.reset_index(), expected) - tm.assert_index_equal(df.index.levels[0], expected1) - tm.assert_index_equal(df.index.levels[1], expected2) - tm.assert_index_equal(df.index.levels[2], idx3) - - # GH 7092 - tm.assert_index_equal(df.index.get_level_values(0), idx1) - tm.assert_index_equal(df.index.get_level_values(1), idx2) - tm.assert_index_equal(df.index.get_level_values(2), idx3) - - def test_reset_index_datetime(self): - # GH 3950 - for tz in ["UTC", "Asia/Tokyo", "US/Eastern"]: - idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") - idx2 = Index(range(5), name="idx2", dtype="int64") - idx = MultiIndex.from_arrays([idx1, idx2]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) - - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - - tm.assert_frame_equal(df.reset_index(), expected) - - idx3 = pd.date_range( - "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" - ) - idx = MultiIndex.from_arrays([idx1, idx2, idx3]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) - - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "idx3": [ - datetime.datetime(2012, 1, 1), - datetime.datetime(2012, 2, 1), - datetime.datetime(2012, 3, 1), - datetime.datetime(2012, 4, 1), - datetime.datetime(2012, 5, 1), - ], - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "idx3", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - expected["idx3"] = expected["idx3"].apply( - lambda d: Timestamp(d, tz="Europe/Paris") - ) - tm.assert_frame_equal(df.reset_index(), expected) - - # GH 7793 - idx = MultiIndex.from_product( - [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] - ) - df = DataFrame( - np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx - ) + # GH 7793 + idx = MultiIndex.from_product( + [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) - expected = DataFrame( - { - "level_0": "a a a b b b".split(), - "level_1": [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3), - ] - * 2, - "a": np.arange(6, dtype="int64"), - }, - columns=["level_0", "level_1", "a"], - ) - expected["level_1"] = expected["level_1"].apply( - lambda d: Timestamp(d, freq="D", tz=tz) - ) - tm.assert_frame_equal(df.reset_index(), expected) + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply( + lambda d: Timestamp(d, freq="D", tz=tz) + ) + tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 @@ -1768,38 +1547,6 @@ def test_reset_index_multiindex_columns(self): result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") tm.assert_frame_equal(result, expected) - def test_set_index_period(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = pd.period_range("2011-01-01", periods=3, freq="M") - idx1 = idx1.append(idx1) - idx2 = pd.period_range("2013-01-01 09:00", periods=2, freq="H") - idx2 = idx2.append(idx2).append(idx2) - idx3 = pd.period_range("2005", periods=6, freq="A") - - df = df.set_index(idx1) - df = df.set_index(idx2, append=True) - df = df.set_index(idx3, append=True) - - expected1 = pd.period_range("2011-01-01", periods=3, freq="M") - expected2 = pd.period_range("2013-01-01 09:00", periods=2, freq="H") - - tm.assert_index_equal(df.index.levels[0], expected1) - tm.assert_index_equal(df.index.levels[1], expected2) - tm.assert_index_equal(df.index.levels[2], idx3) - - tm.assert_index_equal(df.index.get_level_values(0), idx1) - tm.assert_index_equal(df.index.get_level_values(1), idx2) - tm.assert_index_equal(df.index.get_level_values(2), idx3) - - def test_repeat(self): - # GH 9361 - # fixed by # GH 7891 - m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) - data = ["a", "b", "c", "d"] - m_df = Series(data, index=m_idx) - assert m_df.repeat(3).shape == (3 * len(data),) - def test_subsets_multiindex_dtype(self): # GH 20757 data = [["x", 1]] @@ -1813,18 +1560,9 @@ def test_subsets_multiindex_dtype(self): class TestSorted(Base): """ everything you wanted to test about sorting """ - def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - result = frame.sort_index() - assert result.index.names == frame.index.names - - def test_sorting_repr_8017(self): - - np.random.seed(0) - data = np.random.randn(3, 4) - - for gen, extra in [ + @pytest.mark.parametrize( + "gen,extra", + [ ([1.0, 3.0, 2.0, 5.0], 4.0), ([1, 3, 2, 5], 4), ( @@ -1837,44 +1575,50 @@ def test_sorting_repr_8017(self): Timestamp("20130104"), ), (["1one", "3one", "2one", "5one"], "4one"), - ]: - columns = MultiIndex.from_tuples([("red", i) for i in gen]) - df = DataFrame(data, index=list("def"), columns=columns) - df2 = pd.concat( - [ - df, - DataFrame( - "world", - index=list("def"), - columns=MultiIndex.from_tuples([("red", extra)]), - ), - ], - axis=1, - ) + ], + ) + def test_sorting_repr_8017(self, gen, extra): + + np.random.seed(0) + data = np.random.randn(3, 4) + + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - assert str(df2).splitlines()[0].split() == ["red"] + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] - # GH 8017 - # sorting fails after columns added + # GH 8017 + # sorting fails after columns added - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - tm.assert_frame_equal(result, expected) + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - tm.assert_frame_equal(result, expected) + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) - # setitem then sort - result = df.copy() - result[("red", extra)] = "world" + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" - result = result.sort_index(axis=1) - tm.assert_frame_equal(result, expected) + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) def test_sort_non_lexsorted(self): # degenerate case where we sort but don't