From 1e9b5f358f216bdbe70597d5bd99cdeada7d6540 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 10:09:24 -0800 Subject: [PATCH 01/12] misplaced MultiIndex tests --- .../tests/indexes/multi/test_constructors.py | 22 +++ pandas/tests/indexes/multi/test_lexsort.py | 46 ++++++ pandas/tests/test_multilevel.py | 145 +++++------------- 3 files changed, 109 insertions(+), 104 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_lexsort.py diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 2c4b3ce04f96d..56836b9314b96 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -723,3 +723,25 @@ def test_index_equal_empty_iterable(): a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) tm.assert_index_equal(a, b) + + +def test_raise_invalid_sortorder(): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2, + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1, + ) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py new file mode 100644 index 0000000000000..a07a65bc4be19 --- /dev/null +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -0,0 +1,46 @@ +from pandas import MultiIndex + + +class TestIsLexsorted: + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index.is_lexsorted() + assert index.lexsort_depth == 0 + + +class TestLexsortDepth: + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index.lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index.lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index.lexsort_depth == 0 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index efaedfad1e093..fdb25044055d9 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2059,12 +2059,9 @@ def test_sort_index_preserve_levels(self): result = self.frame.sort_index() assert result.index.names == self.frame.index.names - def test_sorting_repr_8017(self): - - np.random.seed(0) - data = np.random.randn(3, 4) - - for gen, extra in [ + @pytest.mark.parametrize( + "gen,extra", + [ ([1.0, 3.0, 2.0, 5.0], 4.0), ([1, 3, 2, 5], 4), ( @@ -2077,44 +2074,50 @@ def test_sorting_repr_8017(self): Timestamp("20130104"), ), (["1one", "3one", "2one", "5one"], "4one"), - ]: - columns = MultiIndex.from_tuples([("red", i) for i in gen]) - df = DataFrame(data, index=list("def"), columns=columns) - df2 = pd.concat( - [ - df, - DataFrame( - "world", - index=list("def"), - columns=MultiIndex.from_tuples([("red", extra)]), - ), - ], - axis=1, - ) + ], + ) + def test_sorting_repr_8017(self, gen, extra): + + np.random.seed(0) + data = np.random.randn(3, 4) - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - assert str(df2).splitlines()[0].split() == ["red"] + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) - # GH 8017 - # sorting fails after columns added + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - tm.assert_frame_equal(result, expected) + # GH 8017 + # sorting fails after columns added - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - tm.assert_frame_equal(result, expected) + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) - # setitem then sort - result = df.copy() - result[("red", extra)] = "world" + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) - result = result.sort_index(axis=1) - tm.assert_frame_equal(result, expected) + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) def test_sort_index_level(self): df = self.frame.copy() @@ -2210,72 +2213,6 @@ def test_sort_index_categorical_multiindex(self): ) tm.assert_frame_equal(result, expected) - def test_is_lexsorted(self): - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - ) - assert index.is_lexsorted() - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] - ) - assert not index.is_lexsorted() - - index = MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] - ) - assert not index.is_lexsorted() - assert index.lexsort_depth == 0 - - def test_raise_invalid_sortorder(self): - # Test that the MultiIndex constructor raise when a incorrect sortorder is given - # Issue #28518 - - levels = [[0, 1], [0, 1, 2]] - - # Correct sortorder - MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - - with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): - MultiIndex( - levels=levels, - codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], - sortorder=2, - ) - - with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): - MultiIndex( - levels=levels, - codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], - sortorder=1, - ) - - def test_lexsort_depth(self): - # Test that lexsort_depth return the correct sortorder - # when it was given to the MultiIndex const. - # Issue #28518 - - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - assert index.lexsort_depth == 2 - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 - ) - assert index.lexsort_depth == 1 - - index = MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 - ) - assert index.lexsort_depth == 0 - def test_sort_index_and_reconstruction(self): # 15622 From 62d71d15787eb84489bb8aa7821081b7232d5064 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 10:30:07 -0800 Subject: [PATCH 02/12] misplaced constructor tests --- .../tests/indexes/multi/test_constructors.py | 51 ++++ pandas/tests/test_multilevel.py | 217 +++++++----------- 2 files changed, 136 insertions(+), 132 deletions(-) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 56836b9314b96..99d86e82e03b2 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -1,3 +1,6 @@ +from datetime import date, datetime +import itertools + import numpy as np import pytest @@ -745,3 +748,51 @@ def test_raise_invalid_sortorder(): MultiIndex( levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1, ) + + +def test_from_datetimeindex(): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo", + ) + idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = date.today() + date2 = datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + +# TODO: overlap with test_from_datetimeindex? +def test_from_datetimeindexes_with_tz(): + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([pd.Series(index), pd.Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fdb25044055d9..da2a545fba712 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1713,53 +1713,6 @@ def test_multiindex_set_index(self): # it works! df.set_index(index) - def test_datetimeindex(self): - idx1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, - tz="Asia/Tokyo", - ) - idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") - idx = MultiIndex.from_arrays([idx1, idx2]) - - expected1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" - ) - - tm.assert_index_equal(idx.levels[0], expected1) - tm.assert_index_equal(idx.levels[1], idx2) - - # from datetime combos - # GH 7888 - date1 = datetime.date.today() - date2 = datetime.datetime.today() - date3 = Timestamp.today() - - for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): - index = MultiIndex.from_product([[d1], [d2]]) - assert isinstance(index.levels[0], pd.DatetimeIndex) - assert isinstance(index.levels[1], pd.DatetimeIndex) - - def test_constructor_with_tz(self): - - index = pd.DatetimeIndex( - ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" - ) - columns = pd.DatetimeIndex( - ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" - ) - - result = MultiIndex.from_arrays([index, columns]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - - result = MultiIndex.from_arrays([Series(index), Series(columns)]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - def test_set_index_datetime(self): # GH 3950 df = DataFrame( @@ -1841,98 +1794,98 @@ def test_set_index_datetime(self): tm.assert_index_equal(df.index.get_level_values(1), idx2) tm.assert_index_equal(df.index.get_level_values(2), idx3) - def test_reset_index_datetime(self): + @pytest.mark.parametrize("tz", ["UTC", "Asia/Tokyo", "US/Eastern"]) + def test_reset_index_datetime(self, tz): # GH 3950 - for tz in ["UTC", "Asia/Tokyo", "US/Eastern"]: - idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") - idx2 = Index(range(5), name="idx2", dtype="int64") - idx = MultiIndex.from_arrays([idx1, idx2]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) + idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - tm.assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) - idx3 = pd.date_range( - "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" - ) - idx = MultiIndex.from_arrays([idx1, idx2, idx3]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) + idx3 = pd.date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + ) + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "idx3": [ - datetime.datetime(2012, 1, 1), - datetime.datetime(2012, 2, 1), - datetime.datetime(2012, 3, 1), - datetime.datetime(2012, 4, 1), - datetime.datetime(2012, 5, 1), - ], - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "idx3", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - expected["idx3"] = expected["idx3"].apply( - lambda d: Timestamp(d, tz="Europe/Paris") - ) - tm.assert_frame_equal(df.reset_index(), expected) + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") + ) + tm.assert_frame_equal(df.reset_index(), expected) - # GH 7793 - idx = MultiIndex.from_product( - [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] - ) - df = DataFrame( - np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx - ) + # GH 7793 + idx = MultiIndex.from_product( + [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) - expected = DataFrame( - { - "level_0": "a a a b b b".split(), - "level_1": [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3), - ] - * 2, - "a": np.arange(6, dtype="int64"), - }, - columns=["level_0", "level_1", "a"], - ) - expected["level_1"] = expected["level_1"].apply( - lambda d: Timestamp(d, freq="D", tz=tz) - ) - tm.assert_frame_equal(df.reset_index(), expected) + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply( + lambda d: Timestamp(d, freq="D", tz=tz) + ) + tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 From 97cca6d46bb510fac7a2e41a8dbf5c86fd6c7983 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 12:20:42 -0800 Subject: [PATCH 03/12] misplaced MultiIndex tessts --- pandas/tests/frame/test_constructors.py | 13 +++++ pandas/tests/indexes/multi/test_format.py | 15 +++++- pandas/tests/indexes/multi/test_indexing.py | 38 +++++++++++++ pandas/tests/test_multilevel.py | 60 --------------------- 4 files changed, 65 insertions(+), 61 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a42cfc6a214ad..c729021ace0cb 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1414,6 +1414,19 @@ def test_constructor_from_ordered_dict(self): result = DataFrame.from_dict(a, orient="index") tm.assert_frame_equal(result, expected) + def test_from_dict_empty_series_multiIndex(self): + s1 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) + ) + s2 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) + ) + s3 = Series(dtype=object) + + # it works! + DataFrame({"foo": s1, "bar": s2, "baz": s3}) + DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) + def test_from_dict_columns_parameter(self): # GH 18529 # Test new columns parameter for from_dict that was added to make diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 75f23fb2f32ba..2db21a0f958db 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -1,9 +1,10 @@ import warnings +import numpy as np import pytest import pandas as pd -from pandas import MultiIndex +from pandas import Index, MultiIndex import pandas._testing as tm @@ -67,6 +68,18 @@ def test_unicode_string_with_unicode(): str(idx) +def test_unicode_repr_issues(): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + + # FIXME: dont leave commented-out + # NumPy bug + # repr(index.get_level_values(1)) + + def test_repr_max_seq_item_setting(idx): # GH10182 idx = idx.repeat(50) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 39049006edb7c..b7d7b3b459aff 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -498,3 +498,41 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id idx = MultiIndex.from_arrays(index_arr) result = idx.slice_indexer(start=start_idx, end=end_idx) assert result == expected + + +def test_pyint_engine(): + # GH#18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(l) + for l in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index da2a545fba712..2a776ad9e55b5 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1265,43 +1265,6 @@ def test_unstack_group_index_overflow(self): result = s.unstack(4) assert result.shape == (500, 2) - def test_pyint_engine(self): - # GH 18519 : when combinations of codes cannot be represented in 64 - # bits, the index underlying the MultiIndex engine works with Python - # integers, rather than uint64. - N = 5 - keys = [ - tuple(l) - for l in [ - [0] * 10 * N, - [1] * 10 * N, - [2] * 10 * N, - [np.nan] * N + [2] * 9 * N, - [0] * N + [2] * 9 * N, - [np.nan] * N + [2] * 8 * N + [0] * N, - ] - ] - # Each level contains 4 elements (including NaN), so it is represented - # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a - # 64 bit engine and truncating the first levels, the fourth and fifth - # keys would collide; if truncating the last levels, the fifth and - # sixth; if rotating bits rather than shifting, the third and fifth. - - for idx in range(len(keys)): - index = MultiIndex.from_tuples(keys) - assert index.get_loc(keys[idx]) == idx - - expected = np.arange(idx + 1, dtype=np.intp) - result = index.get_indexer([keys[i] for i in expected]) - tm.assert_numpy_array_equal(result, expected) - - # With missing key: - idces = range(len(keys)) - expected = np.array([-1] + list(idces), dtype=np.intp) - missing = tuple([0, 1] * 5 * N) - result = index.get_indexer([missing] + [keys[i] for i in idces]) - tm.assert_numpy_array_equal(result, expected) - def test_to_html(self): self.ymd.columns.name = "foo" self.ymd.to_html() @@ -1545,16 +1508,6 @@ def test_drop_preserve_names(self): result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") - def test_unicode_repr_issues(self): - levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] - codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] - index = MultiIndex(levels=levels, codes=codes) - - repr(index.levels) - - # NumPy bug - # repr(index.get_level_values(1)) - def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) @@ -1573,19 +1526,6 @@ def test_join_segfault(self): for how in ["left", "right", "outer"]: df1.join(df2, how=how) - def test_frame_dict_constructor_empty_series(self): - s1 = Series( - [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) - ) - s2 = Series( - [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) - ) - s3 = Series(dtype=object) - - # it works! - DataFrame({"foo": s1, "bar": s2, "baz": s3}) - DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) - @pytest.mark.parametrize("d", [4, "d"]) def test_empty_frame_groupby_dtypes_consistency(self, d): # GH 20888 From 5816a43f9bb9615fa98f6c2936157986eee89e70 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 12:53:41 -0800 Subject: [PATCH 04/12] typo fixup --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c729021ace0cb..a4c82445c23e6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1414,7 +1414,7 @@ def test_constructor_from_ordered_dict(self): result = DataFrame.from_dict(a, orient="index") tm.assert_frame_equal(result, expected) - def test_from_dict_empty_series_multiIndex(self): + def test_from_dict_empty_series_multiindex(self): s1 = Series( [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) ) From 5a21474a958d875d94602880a686623b9480479d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 13:13:21 -0800 Subject: [PATCH 05/12] revert --- pandas/tests/frame/test_constructors.py | 13 - .../tests/indexes/multi/test_constructors.py | 73 --- pandas/tests/indexes/multi/test_format.py | 15 +- pandas/tests/indexes/multi/test_indexing.py | 38 -- pandas/tests/test_multilevel.py | 422 ++++++++++++------ 5 files changed, 297 insertions(+), 264 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a4c82445c23e6..a42cfc6a214ad 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1414,19 +1414,6 @@ def test_constructor_from_ordered_dict(self): result = DataFrame.from_dict(a, orient="index") tm.assert_frame_equal(result, expected) - def test_from_dict_empty_series_multiindex(self): - s1 = Series( - [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) - ) - s2 = Series( - [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) - ) - s3 = Series(dtype=object) - - # it works! - DataFrame({"foo": s1, "bar": s2, "baz": s3}) - DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) - def test_from_dict_columns_parameter(self): # GH 18529 # Test new columns parameter for from_dict that was added to make diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 99d86e82e03b2..2c4b3ce04f96d 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -1,6 +1,3 @@ -from datetime import date, datetime -import itertools - import numpy as np import pytest @@ -726,73 +723,3 @@ def test_index_equal_empty_iterable(): a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) tm.assert_index_equal(a, b) - - -def test_raise_invalid_sortorder(): - # Test that the MultiIndex constructor raise when a incorrect sortorder is given - # GH#28518 - - levels = [[0, 1], [0, 1, 2]] - - # Correct sortorder - MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - - with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): - MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2, - ) - - with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): - MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1, - ) - - -def test_from_datetimeindex(): - idx1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo", - ) - idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") - idx = MultiIndex.from_arrays([idx1, idx2]) - - expected1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" - ) - - tm.assert_index_equal(idx.levels[0], expected1) - tm.assert_index_equal(idx.levels[1], idx2) - - # from datetime combos - # GH 7888 - date1 = date.today() - date2 = datetime.today() - date3 = Timestamp.today() - - for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): - index = MultiIndex.from_product([[d1], [d2]]) - assert isinstance(index.levels[0], pd.DatetimeIndex) - assert isinstance(index.levels[1], pd.DatetimeIndex) - - -# TODO: overlap with test_from_datetimeindex? -def test_from_datetimeindexes_with_tz(): - index = pd.DatetimeIndex( - ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" - ) - columns = pd.DatetimeIndex( - ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" - ) - - result = MultiIndex.from_arrays([index, columns]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - - result = MultiIndex.from_arrays([pd.Series(index), pd.Series(columns)]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 2db21a0f958db..75f23fb2f32ba 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -1,10 +1,9 @@ import warnings -import numpy as np import pytest import pandas as pd -from pandas import Index, MultiIndex +from pandas import MultiIndex import pandas._testing as tm @@ -68,18 +67,6 @@ def test_unicode_string_with_unicode(): str(idx) -def test_unicode_repr_issues(): - levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] - codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] - index = MultiIndex(levels=levels, codes=codes) - - repr(index.levels) - - # FIXME: dont leave commented-out - # NumPy bug - # repr(index.get_level_values(1)) - - def test_repr_max_seq_item_setting(idx): # GH10182 idx = idx.repeat(50) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index b7d7b3b459aff..39049006edb7c 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -498,41 +498,3 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id idx = MultiIndex.from_arrays(index_arr) result = idx.slice_indexer(start=start_idx, end=end_idx) assert result == expected - - -def test_pyint_engine(): - # GH#18519 : when combinations of codes cannot be represented in 64 - # bits, the index underlying the MultiIndex engine works with Python - # integers, rather than uint64. - N = 5 - keys = [ - tuple(l) - for l in [ - [0] * 10 * N, - [1] * 10 * N, - [2] * 10 * N, - [np.nan] * N + [2] * 9 * N, - [0] * N + [2] * 9 * N, - [np.nan] * N + [2] * 8 * N + [0] * N, - ] - ] - # Each level contains 4 elements (including NaN), so it is represented - # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a - # 64 bit engine and truncating the first levels, the fourth and fifth - # keys would collide; if truncating the last levels, the fifth and - # sixth; if rotating bits rather than shifting, the third and fifth. - - for idx in range(len(keys)): - index = MultiIndex.from_tuples(keys) - assert index.get_loc(keys[idx]) == idx - - expected = np.arange(idx + 1, dtype=np.intp) - result = index.get_indexer([keys[i] for i in expected]) - tm.assert_numpy_array_equal(result, expected) - - # With missing key: - idces = range(len(keys)) - expected = np.array([-1] + list(idces), dtype=np.intp) - missing = tuple([0, 1] * 5 * N) - result = index.get_indexer([missing] + [keys[i] for i in idces]) - tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 2a776ad9e55b5..efaedfad1e093 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1265,6 +1265,43 @@ def test_unstack_group_index_overflow(self): result = s.unstack(4) assert result.shape == (500, 2) + def test_pyint_engine(self): + # GH 18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(l) + for l in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) + def test_to_html(self): self.ymd.columns.name = "foo" self.ymd.to_html() @@ -1508,6 +1545,16 @@ def test_drop_preserve_names(self): result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + + # NumPy bug + # repr(index.get_level_values(1)) + def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) @@ -1526,6 +1573,19 @@ def test_join_segfault(self): for how in ["left", "right", "outer"]: df1.join(df2, how=how) + def test_frame_dict_constructor_empty_series(self): + s1 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) + ) + s2 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) + ) + s3 = Series(dtype=object) + + # it works! + DataFrame({"foo": s1, "bar": s2, "baz": s3}) + DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) + @pytest.mark.parametrize("d", [4, "d"]) def test_empty_frame_groupby_dtypes_consistency(self, d): # GH 20888 @@ -1653,6 +1713,53 @@ def test_multiindex_set_index(self): # it works! df.set_index(index) + def test_datetimeindex(self): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, + tz="Asia/Tokyo", + ) + idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = datetime.date.today() + date2 = datetime.datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + def test_constructor_with_tz(self): + + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + def test_set_index_datetime(self): # GH 3950 df = DataFrame( @@ -1734,98 +1841,98 @@ def test_set_index_datetime(self): tm.assert_index_equal(df.index.get_level_values(1), idx2) tm.assert_index_equal(df.index.get_level_values(2), idx3) - @pytest.mark.parametrize("tz", ["UTC", "Asia/Tokyo", "US/Eastern"]) - def test_reset_index_datetime(self, tz): + def test_reset_index_datetime(self): # GH 3950 - idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") - idx2 = Index(range(5), name="idx2", dtype="int64") - idx = MultiIndex.from_arrays([idx1, idx2]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) + for tz in ["UTC", "Asia/Tokyo", "US/Eastern"]: + idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - tm.assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) - idx3 = pd.date_range( - "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" - ) - idx = MultiIndex.from_arrays([idx1, idx2, idx3]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) + idx3 = pd.date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + ) + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "idx3": [ - datetime.datetime(2012, 1, 1), - datetime.datetime(2012, 2, 1), - datetime.datetime(2012, 3, 1), - datetime.datetime(2012, 4, 1), - datetime.datetime(2012, 5, 1), - ], - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "idx3", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - expected["idx3"] = expected["idx3"].apply( - lambda d: Timestamp(d, tz="Europe/Paris") - ) - tm.assert_frame_equal(df.reset_index(), expected) + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") + ) + tm.assert_frame_equal(df.reset_index(), expected) - # GH 7793 - idx = MultiIndex.from_product( - [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] - ) - df = DataFrame( - np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx - ) + # GH 7793 + idx = MultiIndex.from_product( + [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) - expected = DataFrame( - { - "level_0": "a a a b b b".split(), - "level_1": [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3), - ] - * 2, - "a": np.arange(6, dtype="int64"), - }, - columns=["level_0", "level_1", "a"], - ) - expected["level_1"] = expected["level_1"].apply( - lambda d: Timestamp(d, freq="D", tz=tz) - ) - tm.assert_frame_equal(df.reset_index(), expected) + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply( + lambda d: Timestamp(d, freq="D", tz=tz) + ) + tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 @@ -1952,9 +2059,12 @@ def test_sort_index_preserve_levels(self): result = self.frame.sort_index() assert result.index.names == self.frame.index.names - @pytest.mark.parametrize( - "gen,extra", - [ + def test_sorting_repr_8017(self): + + np.random.seed(0) + data = np.random.randn(3, 4) + + for gen, extra in [ ([1.0, 3.0, 2.0, 5.0], 4.0), ([1, 3, 2, 5], 4), ( @@ -1967,50 +2077,44 @@ def test_sort_index_preserve_levels(self): Timestamp("20130104"), ), (["1one", "3one", "2one", "5one"], "4one"), - ], - ) - def test_sorting_repr_8017(self, gen, extra): - - np.random.seed(0) - data = np.random.randn(3, 4) - - columns = MultiIndex.from_tuples([("red", i) for i in gen]) - df = DataFrame(data, index=list("def"), columns=columns) - df2 = pd.concat( - [ - df, - DataFrame( - "world", - index=list("def"), - columns=MultiIndex.from_tuples([("red", extra)]), - ), - ], - axis=1, - ) + ]: + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - assert str(df2).splitlines()[0].split() == ["red"] + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] - # GH 8017 - # sorting fails after columns added + # GH 8017 + # sorting fails after columns added - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - tm.assert_frame_equal(result, expected) + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - tm.assert_frame_equal(result, expected) + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) - # setitem then sort - result = df.copy() - result[("red", extra)] = "world" + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" - result = result.sort_index(axis=1) - tm.assert_frame_equal(result, expected) + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) def test_sort_index_level(self): df = self.frame.copy() @@ -2106,6 +2210,72 @@ def test_sort_index_categorical_multiindex(self): ) tm.assert_frame_equal(result, expected) + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index.is_lexsorted() + assert index.lexsort_depth == 0 + + def test_raise_invalid_sortorder(self): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # Issue #28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], + sortorder=2, + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, + codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], + sortorder=1, + ) + + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # Issue #28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index.lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index.lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index.lexsort_depth == 0 + def test_sort_index_and_reconstruction(self): # 15622 From c5ded77da4cfa585139f38286b16843d18adec8f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 13:15:39 -0800 Subject: [PATCH 06/12] misplaced MultiIndex tests --- pandas/tests/indexes/multi/test_lexsort.py | 2 +- pandas/tests/test_multilevel.py | 41 ---------------------- 2 files changed, 1 insertion(+), 42 deletions(-) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py index a07a65bc4be19..1d2ad8e02697e 100644 --- a/pandas/tests/indexes/multi/test_lexsort.py +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -24,7 +24,7 @@ def test_is_lexsorted(self): class TestLexsortDepth: def test_lexsort_depth(self): - # Test that lexsort_depth return the correct sortorder + # Test that lexsort_depth return the correct sortorder # when it was given to the MultiIndex const. # GH#28518 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index efaedfad1e093..4f2aa808ae55c 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2210,25 +2210,6 @@ def test_sort_index_categorical_multiindex(self): ) tm.assert_frame_equal(result, expected) - def test_is_lexsorted(self): - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - ) - assert index.is_lexsorted() - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] - ) - assert not index.is_lexsorted() - - index = MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] - ) - assert not index.is_lexsorted() - assert index.lexsort_depth == 0 - def test_raise_invalid_sortorder(self): # Test that the MultiIndex constructor raise when a incorrect sortorder is given # Issue #28518 @@ -2254,28 +2235,6 @@ def test_raise_invalid_sortorder(self): sortorder=1, ) - def test_lexsort_depth(self): - # Test that lexsort_depth return the correct sortorder - # when it was given to the MultiIndex const. - # Issue #28518 - - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - assert index.lexsort_depth == 2 - - index = MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 - ) - assert index.lexsort_depth == 1 - - index = MultiIndex( - levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 - ) - assert index.lexsort_depth == 0 - def test_sort_index_and_reconstruction(self): # 15622 From 33398a825c8d85bd6dd4ddf595c769bd5cf32fa9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 13:16:47 -0800 Subject: [PATCH 07/12] misplaced MultiIndex test --- .../tests/indexes/multi/test_constructors.py | 22 ++++++++++++++++ pandas/tests/test_multilevel.py | 25 ------------------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 2c4b3ce04f96d..56836b9314b96 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -723,3 +723,25 @@ def test_index_equal_empty_iterable(): a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) tm.assert_index_equal(a, b) + + +def test_raise_invalid_sortorder(): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2, + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1, + ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 4f2aa808ae55c..ee9539363b941 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2210,31 +2210,6 @@ def test_sort_index_categorical_multiindex(self): ) tm.assert_frame_equal(result, expected) - def test_raise_invalid_sortorder(self): - # Test that the MultiIndex constructor raise when a incorrect sortorder is given - # Issue #28518 - - levels = [[0, 1], [0, 1, 2]] - - # Correct sortorder - MultiIndex( - levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 - ) - - with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): - MultiIndex( - levels=levels, - codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], - sortorder=2, - ) - - with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): - MultiIndex( - levels=levels, - codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], - sortorder=1, - ) - def test_sort_index_and_reconstruction(self): # 15622 From 582c0d03e4c7ce3718c9e64ad7635863cbb9935d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 13:26:22 -0800 Subject: [PATCH 08/12] misplaced MultiIndex tests --- .../tests/indexes/multi/test_constructors.py | 53 ++++++++++++++++++- pandas/tests/test_multilevel.py | 47 ---------------- 2 files changed, 52 insertions(+), 48 deletions(-) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 56836b9314b96..1157c7f8bb962 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -1,3 +1,6 @@ +from datetime import date, datetime +import itertools + import numpy as np import pytest @@ -6,7 +9,7 @@ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike import pandas as pd -from pandas import Index, MultiIndex, date_range +from pandas import Index, MultiIndex, Series, date_range import pandas._testing as tm @@ -745,3 +748,51 @@ def test_raise_invalid_sortorder(): MultiIndex( levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1, ) + + +def test_datetimeindex(): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo", + ) + idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = date.today() + date2 = datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + +def test_constructor_with_tz(): + + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index ee9539363b941..d5d61957d8310 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1713,53 +1713,6 @@ def test_multiindex_set_index(self): # it works! df.set_index(index) - def test_datetimeindex(self): - idx1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, - tz="Asia/Tokyo", - ) - idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") - idx = MultiIndex.from_arrays([idx1, idx2]) - - expected1 = pd.DatetimeIndex( - ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" - ) - - tm.assert_index_equal(idx.levels[0], expected1) - tm.assert_index_equal(idx.levels[1], idx2) - - # from datetime combos - # GH 7888 - date1 = datetime.date.today() - date2 = datetime.datetime.today() - date3 = Timestamp.today() - - for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): - index = MultiIndex.from_product([[d1], [d2]]) - assert isinstance(index.levels[0], pd.DatetimeIndex) - assert isinstance(index.levels[1], pd.DatetimeIndex) - - def test_constructor_with_tz(self): - - index = pd.DatetimeIndex( - ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" - ) - columns = pd.DatetimeIndex( - ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" - ) - - result = MultiIndex.from_arrays([index, columns]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - - result = MultiIndex.from_arrays([Series(index), Series(columns)]) - - assert result.names == ["dt1", "dt2"] - tm.assert_index_equal(result.levels[0], index) - tm.assert_index_equal(result.levels[1], columns) - def test_set_index_datetime(self): # GH 3950 df = DataFrame( From 7f8f9e7855e49b69aa39b7f7dc86ff9d0240f4d5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 13:48:14 -0800 Subject: [PATCH 09/12] misplaced MultiIndex test --- pandas/tests/indexes/multi/test_duplicates.py | 26 +++++++++++++++++++ pandas/tests/test_multilevel.py | 25 ------------------ 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 93e1de535835f..5e17a19335c7e 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -274,3 +274,29 @@ def test_duplicated2(): tm.assert_numpy_array_equal( mi.duplicated(), np.zeros(len(mi), dtype="bool") ) + + +def test_duplicated_drop_duplicates(): + # GH#4060 + idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) + + expected = np.array([False, False, False, True, False, False], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(), expected) + + expected = np.array([True, False, False, False, False, False]) + duplicated = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) + + expected = np.array([True, False, False, True, False, False]) + duplicated = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d5d61957d8310..a21cb1f966f48 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1677,31 +1677,6 @@ def test_duplicate_mi(self): result = df.loc[("foo", "bar")] tm.assert_frame_equal(result, expected) - def test_duplicated_drop_duplicates(self): - # GH 4060 - idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) - - expected = np.array([False, False, False, True, False, False], dtype=bool) - duplicated = idx.duplicated() - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) - tm.assert_index_equal(idx.drop_duplicates(), expected) - - expected = np.array([True, False, False, False, False, False]) - duplicated = idx.duplicated(keep="last") - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) - tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) - - expected = np.array([True, False, False, True, False, False]) - duplicated = idx.duplicated(keep=False) - tm.assert_numpy_array_equal(duplicated, expected) - assert duplicated.dtype == bool - expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) - tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) - def test_multiindex_set_index(self): # segfault in #3308 d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} From 441f74287ac748bc0419ae4f62b3df2d343319fa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 14:33:40 -0800 Subject: [PATCH 10/12] misplaced MultiIndex test --- pandas/tests/indexes/multi/test_format.py | 14 +++++- pandas/tests/indexes/multi/test_indexing.py | 38 +++++++++++++++++ pandas/tests/test_multilevel.py | 47 --------------------- 3 files changed, 51 insertions(+), 48 deletions(-) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 75f23fb2f32ba..75499bd79cca0 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -1,9 +1,10 @@ import warnings +import numpy as np import pytest import pandas as pd -from pandas import MultiIndex +from pandas import Index, MultiIndex import pandas._testing as tm @@ -76,6 +77,17 @@ def test_repr_max_seq_item_setting(idx): class TestRepr: + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + + # FIXME: dont leave commented-out + # NumPy bug + # repr(index.get_level_values(1)) + def test_repr(self, idx): result = idx[:1].__repr__() expected = """\ diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 39049006edb7c..b7d7b3b459aff 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -498,3 +498,41 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id idx = MultiIndex.from_arrays(index_arr) result = idx.slice_indexer(start=start_idx, end=end_idx) assert result == expected + + +def test_pyint_engine(): + # GH#18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(l) + for l in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a21cb1f966f48..76eb134c720c7 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1265,43 +1265,6 @@ def test_unstack_group_index_overflow(self): result = s.unstack(4) assert result.shape == (500, 2) - def test_pyint_engine(self): - # GH 18519 : when combinations of codes cannot be represented in 64 - # bits, the index underlying the MultiIndex engine works with Python - # integers, rather than uint64. - N = 5 - keys = [ - tuple(l) - for l in [ - [0] * 10 * N, - [1] * 10 * N, - [2] * 10 * N, - [np.nan] * N + [2] * 9 * N, - [0] * N + [2] * 9 * N, - [np.nan] * N + [2] * 8 * N + [0] * N, - ] - ] - # Each level contains 4 elements (including NaN), so it is represented - # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a - # 64 bit engine and truncating the first levels, the fourth and fifth - # keys would collide; if truncating the last levels, the fifth and - # sixth; if rotating bits rather than shifting, the third and fifth. - - for idx in range(len(keys)): - index = MultiIndex.from_tuples(keys) - assert index.get_loc(keys[idx]) == idx - - expected = np.arange(idx + 1, dtype=np.intp) - result = index.get_indexer([keys[i] for i in expected]) - tm.assert_numpy_array_equal(result, expected) - - # With missing key: - idces = range(len(keys)) - expected = np.array([-1] + list(idces), dtype=np.intp) - missing = tuple([0, 1] * 5 * N) - result = index.get_indexer([missing] + [keys[i] for i in idces]) - tm.assert_numpy_array_equal(result, expected) - def test_to_html(self): self.ymd.columns.name = "foo" self.ymd.to_html() @@ -1545,16 +1508,6 @@ def test_drop_preserve_names(self): result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") - def test_unicode_repr_issues(self): - levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] - codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] - index = MultiIndex(levels=levels, codes=codes) - - repr(index.levels) - - # NumPy bug - # repr(index.get_level_values(1)) - def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) From d82f89ca327bf6756c15f6edeb1e742a92d85c61 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 14:41:41 -0800 Subject: [PATCH 11/12] misplaced MultiIndex tests --- pandas/tests/indexes/multi/test_missing.py | 10 ++++ pandas/tests/indexes/multi/test_reshape.py | 50 +++++++++++++++++++ pandas/tests/test_multilevel.py | 58 +--------------------- 3 files changed, 61 insertions(+), 57 deletions(-) diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index a17e1e9928bff..54ffec2e03fd3 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -141,3 +141,13 @@ def test_nan_stays_float(): assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + +def test_tuples_have_na(): + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + assert pd.isna(index[4][0]) + assert pd.isna(index.values[4][0]) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 2e39c714ca7af..18b851af4ca01 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -1,5 +1,8 @@ +from datetime import datetime + import numpy as np import pytest +import pytz import pandas as pd from pandas import Index, MultiIndex @@ -95,6 +98,53 @@ def test_append(idx): assert result.equals(idx) +def test_append_index(): + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") + idx3 = Index(["A", "B", "C"]) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + + # see gh-7112 + tz = pytz.timezone("Asia/Tokyo") + expected_tuples = [ + (1.1, tz.localize(datetime.datetime(2011, 1, 1))), + (1.2, tz.localize(datetime.datetime(2011, 1, 2))), + (1.3, tz.localize(datetime.datetime(2011, 1, 3))), + ] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(idx1) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv3) + tm.assert_index_equal(result, expected) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array( + [ + (1.1, tz.localize(datetime.datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"), + ] + + expected_tuples, + dtype=object, + ), + None, + ) + tm.assert_index_equal(result, expected) + + def test_repeat(): reps = 2 numbers = [1, 2, 3] diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 76eb134c720c7..e3cf46b466ae4 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -6,12 +6,11 @@ import numpy as np from numpy.random import randn import pytest -import pytz from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp import pandas._testing as tm AGG_FUNCTIONS = [ @@ -80,52 +79,6 @@ def test_append(self): result = a["A"].append(b["A"]) tm.assert_series_equal(result, self.frame["A"]) - def test_append_index(self): - idx1 = Index([1.1, 1.2, 1.3]) - idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") - idx3 = Index(["A", "B", "C"]) - - midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) - midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) - - result = idx1.append(midx_lv2) - - # see gh-7112 - tz = pytz.timezone("Asia/Tokyo") - expected_tuples = [ - (1.1, tz.localize(datetime.datetime(2011, 1, 1))), - (1.2, tz.localize(datetime.datetime(2011, 1, 2))), - (1.3, tz.localize(datetime.datetime(2011, 1, 3))), - ] - expected = Index([1.1, 1.2, 1.3] + expected_tuples) - tm.assert_index_equal(result, expected) - - result = midx_lv2.append(idx1) - expected = Index(expected_tuples + [1.1, 1.2, 1.3]) - tm.assert_index_equal(result, expected) - - result = midx_lv2.append(midx_lv2) - expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) - tm.assert_index_equal(result, expected) - - result = midx_lv2.append(midx_lv3) - tm.assert_index_equal(result, expected) - - result = midx_lv3.append(midx_lv2) - expected = Index._simple_new( - np.array( - [ - (1.1, tz.localize(datetime.datetime(2011, 1, 1)), "A"), - (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"), - (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"), - ] - + expected_tuples, - dtype=object, - ), - None, - ) - tm.assert_index_equal(result, expected) - def test_dataframe_constructor(self): multi = DataFrame( np.random.randn(4, 4), @@ -1584,15 +1537,6 @@ def test_assign_index_sequences(self): df.index = index repr(df) - def test_tuples_have_na(self): - index = MultiIndex( - levels=[[1, 0], [0, 1, 2, 3]], - codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], - ) - - assert isna(index[4][0]) - assert isna(index.values[4][0]) - def test_duplicate_groupby_issues(self): idx_tp = [ ("600809", "20061231"), From 3b50eb48346e40323a1a8d970668ac157c8ed579 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Feb 2020 16:15:23 -0800 Subject: [PATCH 12/12] fixup import --- pandas/tests/indexes/multi/test_reshape.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 18b851af4ca01..de32bd94be491 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -111,9 +111,9 @@ def test_append_index(): # see gh-7112 tz = pytz.timezone("Asia/Tokyo") expected_tuples = [ - (1.1, tz.localize(datetime.datetime(2011, 1, 1))), - (1.2, tz.localize(datetime.datetime(2011, 1, 2))), - (1.3, tz.localize(datetime.datetime(2011, 1, 3))), + (1.1, tz.localize(datetime(2011, 1, 1))), + (1.2, tz.localize(datetime(2011, 1, 2))), + (1.3, tz.localize(datetime(2011, 1, 3))), ] expected = Index([1.1, 1.2, 1.3] + expected_tuples) tm.assert_index_equal(result, expected) @@ -133,9 +133,9 @@ def test_append_index(): expected = Index._simple_new( np.array( [ - (1.1, tz.localize(datetime.datetime(2011, 1, 1)), "A"), - (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"), - (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"), + (1.1, tz.localize(datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime(2011, 1, 3)), "C"), ] + expected_tuples, dtype=object,