From b522fbe074f9b405a558ea0b5d1b87d3815290d4 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Feb 2021 21:47:01 -0800 Subject: [PATCH 1/2] TST/REF: split/collect large tests --- pandas/tests/frame/indexing/test_getitem.py | 82 +++++++++++ pandas/tests/frame/indexing/test_setitem.py | 92 ++++++++++++ pandas/tests/frame/test_alter_axes.py | 139 +----------------- pandas/tests/frame/test_constructors.py | 14 ++ pandas/tests/frame/test_nonunique_indexes.py | 94 +++--------- .../datetimes/methods/test_to_series.py | 37 +++++ 6 files changed, 249 insertions(+), 209 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/methods/test_to_series.py diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 4282db6933371..7c48c412fd694 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -10,6 +10,7 @@ MultiIndex, Series, Timestamp, + concat, get_dummies, period_range, ) @@ -176,6 +177,87 @@ def test_getitem_bool_mask_categorical_index(self): with pytest.raises(TypeError, match=msg): df4[df4.index > 1] + @pytest.mark.parametrize( + "data1,data2,expected_data", + ( + ( + [[1, 2], [3, 4]], + [[0.5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], + ), + ( + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], + ), + ), + ) + def test_getitem_bool_mask_duplicate_columns_mixed_dtypes( + self, + data1, + data2, + expected_data, + ): + # GH#31954 + + df1 = DataFrame(np.array(data1)) + df2 = DataFrame(np.array(data2)) + df = concat([df1, df2], axis=1) + + result = df[df > 2] + + exdict = {i: np.array(col) for i, col in enumerate(expected_data)} + expected = DataFrame(exdict).rename(columns={2: 0, 3: 1}) + tm.assert_frame_equal(result, expected) + + @pytest.fixture + def df_dup_cols(self): + dups = ["A", "A", "C", "D"] + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + return df + + def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols): + # `df.A > 6` is a DataFrame with a different shape from df + + # boolean with the duplicate raises + df = df_dup_cols + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df[df.A > 6] + + def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols): + # boolean indexing + # GH#4879 + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + expected = df[df.C > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df.C > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols): + + # where + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + # `df > 6` is a DataFrame with the same shape+alignment as df + expected = df[df > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + class TestGetitemSlice: def test_getitem_slice_float64(self, frame_or_series): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9318764a1b5ad..4dfbc0b918aaa 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -2,18 +2,26 @@ import pytest from pandas.core.dtypes.base import registry as ea_registry +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_interval_dtype, + is_object_dtype, +) from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas import ( Categorical, DataFrame, + DatetimeIndex, Index, Interval, + IntervalIndex, NaT, Period, PeriodIndex, Series, Timestamp, + cut, date_range, notna, period_range, @@ -395,6 +403,90 @@ def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs + def test_setitem_intervals(self): + + df = DataFrame({"A": range(10)}) + ser = cut(df["A"], 5) + assert isinstance(ser.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainer are converted to in-line objects + # contining an IntervalIndex.values + df["B"] = ser + df["C"] = np.array(ser) + df["D"] = ser.values + df["E"] = np.array(ser.values) + + assert is_categorical_dtype(df["B"].dtype) + assert is_interval_dtype(df["B"].cat.categories) + assert is_categorical_dtype(df["D"].dtype) + assert is_interval_dtype(df["D"].cat.categories) + + assert is_object_dtype(df["C"]) + assert is_object_dtype(df["E"]) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B)) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.C), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df["B"], df["B"]) + tm.assert_series_equal(df["B"], df["D"], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df["C"], df["C"]) + tm.assert_series_equal(df["C"], df["E"], check_names=False) + + +class TestSetitemTZAwareValues: + @pytest.fixture + def idx(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + return idx + + @pytest.fixture + def expected(self, idx): + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + return expected + + def test_setitem_dt64series(self, idx, expected): + # convert to utc + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df["B"] = idx + + with tm.assert_produces_warning(FutureWarning) as m: + df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) + + result = df["B"] + comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") + tm.assert_series_equal(result, comp) + + def test_setitem_datetimeindex(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # assign to frame + df["B"] = idx + result = df["B"] + tm.assert_series_equal(result, expected) + + def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # object array of datetimes with a tz + df["B"] = idx.to_pydatetime() + result = df["B"] + tm.assert_series_equal(result, expected) + class TestDataFrameSetItemWithExpansion: def test_setitem_listlike_views(self): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 862f5b87785f5..9d56adba08b9a 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,112 +1,13 @@ from datetime import datetime import numpy as np -import pytest import pytz -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_interval_dtype, - is_object_dtype, -) - -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - IntervalIndex, - Series, - Timestamp, - cut, - date_range, -) +from pandas import DataFrame, cut, date_range import pandas._testing as tm class TestDataFrameAlterAxes: - @pytest.fixture - def idx_expected(self): - idx = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B").tz_localize( - "US/Pacific" - ) - - expected = Series( - np.array( - [ - Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), - Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), - ], - dtype="object", - ), - name="B", - ) - assert expected.dtype == idx.dtype - return idx, expected - - def test_to_series_keep_tz_deprecated_true(self, idx_expected): - # convert to series while keeping the timezone - idx, expected = idx_expected - - msg = "stop passing 'keep_tz'" - with tm.assert_produces_warning(FutureWarning) as m: - result = idx.to_series(keep_tz=True, index=[0, 1]) - assert msg in str(m[0].message) - - tm.assert_series_equal(result, expected) - - def test_to_series_keep_tz_deprecated_false(self, idx_expected): - idx, expected = idx_expected - - with tm.assert_produces_warning(FutureWarning) as m: - result = idx.to_series(keep_tz=False, index=[0, 1]) - tm.assert_series_equal(result, expected.dt.tz_convert(None)) - msg = "do 'idx.tz_convert(None)' before calling" - assert msg in str(m[0].message) - - def test_setitem_dt64series(self, idx_expected): - # convert to utc - idx, expected = idx_expected - df = DataFrame(np.random.randn(2, 1), columns=["A"]) - df["B"] = idx - - with tm.assert_produces_warning(FutureWarning) as m: - df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) - msg = "do 'idx.tz_convert(None)' before calling" - assert msg in str(m[0].message) - - result = df["B"] - comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") - tm.assert_series_equal(result, comp) - - def test_setitem_datetimeindex(self, idx_expected): - # setting a DataFrame column with a tzaware DTI retains the dtype - idx, expected = idx_expected - df = DataFrame(np.random.randn(2, 1), columns=["A"]) - - # assign to frame - df["B"] = idx - result = df["B"] - tm.assert_series_equal(result, expected) - - def test_setitem_object_array_of_tzaware_datetimes(self, idx_expected): - # setting a DataFrame column with a tzaware DTI retains the dtype - idx, expected = idx_expected - df = DataFrame(np.random.randn(2, 1), columns=["A"]) - - # object array of datetimes with a tz - df["B"] = idx.to_pydatetime() - result = df["B"] - tm.assert_series_equal(result, expected) - - def test_constructor_from_tzaware_datetimeindex(self, idx_expected): - # don't cast a DatetimeIndex WITH a tz, leave as object - # GH 6032 - idx, expected = idx_expected - - # convert index to series - result = Series(idx) - tm.assert_series_equal(result, expected) - def test_set_axis_setattr_index(self): # GH 6785 # set the index manually @@ -154,44 +55,6 @@ def test_assign_columns(self, float_frame): class TestIntervalIndex: - def test_setitem(self): - - df = DataFrame({"A": range(10)}) - ser = cut(df["A"], 5) - assert isinstance(ser.cat.categories, IntervalIndex) - - # B & D end up as Categoricals - # the remainer are converted to in-line objects - # contining an IntervalIndex.values - df["B"] = ser - df["C"] = np.array(ser) - df["D"] = ser.values - df["E"] = np.array(ser.values) - - assert is_categorical_dtype(df["B"].dtype) - assert is_interval_dtype(df["B"].cat.categories) - assert is_categorical_dtype(df["D"].dtype) - assert is_interval_dtype(df["D"].cat.categories) - - assert is_object_dtype(df["C"]) - assert is_object_dtype(df["E"]) - - # they compare equal as Index - # when converted to numpy objects - c = lambda x: Index(np.array(x)) - tm.assert_index_equal(c(df.B), c(df.B)) - tm.assert_index_equal(c(df.B), c(df.C), check_names=False) - tm.assert_index_equal(c(df.B), c(df.D), check_names=False) - tm.assert_index_equal(c(df.C), c(df.D), check_names=False) - - # B & D are the same Series - tm.assert_series_equal(df["B"], df["B"]) - tm.assert_series_equal(df["B"], df["D"], check_names=False) - - # C & E are the same Series - tm.assert_series_equal(df["C"], df["C"]) - tm.assert_series_equal(df["C"], df["E"], check_names=False) - def test_set_reset_index(self): df = DataFrame({"A": range(10)}) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 9ec745932514f..5fcab5200e305 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -20,6 +20,7 @@ Categorical, CategoricalIndex, DataFrame, + DatetimeIndex, Index, Interval, MultiIndex, @@ -48,6 +49,19 @@ class TestDataFrameConstructors: + def test_constructor_from_tzaware_datetimeindex(self): + # don't cast a DatetimeIndex WITH a tz, leave as object + # GH#6032 + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + + # convert index to series + result = Series(idx) + tm.assert_series_equal(result, expected) + def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): # GH#39462 nat = np.datetime64("NaT", "ns") diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 8dcf6f2188058..1f892c3a03e85 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -33,6 +33,7 @@ def test_column_dups_operations(self): expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx) check(df, expected) + def test_insert_with_duplicate_columns(self): # insert df = DataFrame( [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], @@ -119,6 +120,7 @@ def test_column_dups_operations(self): ) tm.assert_frame_equal(df, expected) + def test_dup_across_dtypes(self): # dup across dtypes df = DataFrame( [[1, 1, 1.0, 5], [1, 1, 2.0, 5], [2, 1, 3.0, 5]], @@ -155,12 +157,14 @@ def test_column_dups_operations(self): ) check(df, expected) + def test_values_with_duplicate_columns(self): # values df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) result = df.values expected = np.array([[1, 2.5], [3, 4.5]]) assert (result == expected).all().all() + def test_rename_with_duplicate_columns(self): # rename, GH 4403 df4 = DataFrame( {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, @@ -201,6 +205,8 @@ def test_column_dups_operations(self): ).set_index(["STK_ID", "RPT_Date"], drop=False) tm.assert_frame_equal(result, expected) + def test_reindex_with_duplicate_columns(self): + # reindex is invalid! df = DataFrame( [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] @@ -211,6 +217,8 @@ def test_column_dups_operations(self): with pytest.raises(ValueError, match=msg): df.reindex(columns=["bar", "foo"]) + def test_drop_with_duplicate_columns(self): + # drop df = DataFrame( [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] @@ -221,6 +229,7 @@ def test_column_dups_operations(self): result = df.drop("a", axis=1) check(result, expected) + def test_describe_with_duplicate_columns(self): # describe df = DataFrame( [[1, 1, 1], [2, 2, 2], [3, 3, 3]], @@ -232,6 +241,7 @@ def test_column_dups_operations(self): expected = pd.concat([s, s, s], keys=df.columns, axis=1) check(result, expected) + def test_column_dups_indexes(self): # check column dups with index equal and not equal to df's index df = DataFrame( np.random.randn(5, 3), @@ -248,6 +258,8 @@ def test_column_dups_operations(self): this_df["A"] = index check(this_df, expected_df) + def test_arithmetic_with_dups(self): + # operations for op in ["__add__", "__mul__", "__sub__", "__truediv__"]: df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) @@ -257,6 +269,7 @@ def test_column_dups_operations(self): result = getattr(df, op)(df) check(result, expected) + def test_changing_dtypes_with_duplicate_columns(self): # multiple assignments that change dtypes # the location indexer is a slice # GH 6120 @@ -272,7 +285,7 @@ def test_column_dups_operations(self): df["that"] = 1 check(df, expected) - def test_column_dups2(self): + def test_column_dups_drop(self): # drop buggy GH 6240 df = DataFrame( @@ -289,6 +302,7 @@ def test_column_dups2(self): result = df2.drop("C", axis=1) tm.assert_frame_equal(result, expected) + def test_column_dups_dropna(self): # dropna df = DataFrame( { @@ -310,43 +324,6 @@ def test_column_dups2(self): result = df.dropna(subset=["A", "C"], how="all") tm.assert_frame_equal(result, expected) - def test_getitem_boolean_series_with_duplicate_columns(self): - # boolean indexing - # GH 4879 - dups = ["A", "A", "C", "D"] - df = DataFrame( - np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" - ) - expected = df[df.C > 6] - expected.columns = dups - df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") - result = df[df.C > 6] - check(result, expected) - - def test_getitem_boolean_frame_with_duplicate_columns(self): - dups = ["A", "A", "C", "D"] - - # where - df = DataFrame( - np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" - ) - # `df > 6` is a DataFrame with the same shape+alignment as df - expected = df[df > 6] - expected.columns = dups - df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") - result = df[df > 6] - check(result, expected) - - def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self): - # `df.A > 6` is a DataFrame with a different shape from df - dups = ["A", "A", "C", "D"] - - # boolean with the duplicate raises - df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") - msg = "cannot reindex from a duplicate axis" - with pytest.raises(ValueError, match=msg): - df[df.A > 6] - def test_column_dups_indexing(self): # dup aligning operations should work @@ -357,6 +334,7 @@ def test_column_dups_indexing(self): result = df1.sub(df2) tm.assert_frame_equal(result, expected) + def test_dup_columns_comparisons(self): # equality df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"]) @@ -374,6 +352,7 @@ def test_column_dups_indexing(self): ) tm.assert_frame_equal(result, expected) + def test_mixed_column_selection(self): # mixed column selection # GH 5639 dfbool = DataFrame( @@ -387,6 +366,7 @@ def test_column_dups_indexing(self): result = dfbool[["one", "three", "one"]] check(result, expected) + def test_multi_axis_dups(self): # multi-axis dups # GH 6121 df = DataFrame( @@ -422,6 +402,7 @@ def test_columns_with_dups(self): expected = DataFrame([[1, 2, 3]], columns=["b", "a", "a.1"]) tm.assert_frame_equal(df, expected) + def test_columns_with_dup_index(self): # with a dup index df = DataFrame([[1, 2]], columns=["a", "a"]) df.columns = ["b", "b"] @@ -429,6 +410,7 @@ def test_columns_with_dups(self): expected = DataFrame([[1, 2]], columns=["b", "b"]) tm.assert_frame_equal(df, expected) + def test_multi_dtype(self): # multi-dtype df = DataFrame( [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], @@ -441,12 +423,14 @@ def test_columns_with_dups(self): ) tm.assert_frame_equal(df, expected) + def test_multi_dtype2(self): df = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a", "a", "a"]) df.columns = ["a", "a.1", "a.2", "a.3"] str(df) expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) tm.assert_frame_equal(df, expected) + def test_dups_across_blocks(self): # dups across blocks df_float = DataFrame(np.random.randn(10, 3), dtype="float64") df_int = DataFrame(np.random.randn(10, 3), dtype="int64") @@ -464,6 +448,7 @@ def test_columns_with_dups(self): for i in range(len(df.columns)): df.iloc[:, i] + def test_dup_columns_across_dtype(self): # dup columns across dtype GH 2079/2194 vals = [[1, -1, 2.0], [2, -2, 3.0]] rs = DataFrame(vals, columns=["A", "A", "B"]) @@ -486,36 +471,3 @@ def test_set_value_by_index(self): df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) - - @pytest.mark.parametrize( - "data1,data2,expected_data", - ( - ( - [[1, 2], [3, 4]], - [[0.5, 6], [7, 8]], - [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], - ), - ( - [[1, 2], [3, 4]], - [[5, 6], [7, 8]], - [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], - ), - ), - ) - def test_masking_duplicate_columns_mixed_dtypes( - self, - data1, - data2, - expected_data, - ): - # GH31954 - - df1 = DataFrame(np.array(data1)) - df2 = DataFrame(np.array(data2)) - df = pd.concat([df1, df2], axis=1) - - result = df[df > 2] - expected = DataFrame( - {i: np.array(col) for i, col in enumerate(expected_data)} - ).rename(columns={2: 0, 3: 1}) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_series.py b/pandas/tests/indexes/datetimes/methods/test_to_series.py new file mode 100644 index 0000000000000..5998fc0dde499 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_to_series.py @@ -0,0 +1,37 @@ +import numpy as np +import pytest + +from pandas import DatetimeIndex, Series +import pandas._testing as tm + + +class TestToSeries: + @pytest.fixture + def idx_expected(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + + assert expected.dtype == idx.dtype + return idx, expected + + def test_to_series_keep_tz_deprecated_true(self, idx_expected): + # convert to series while keeping the timezone + idx, expected = idx_expected + + msg = "stop passing 'keep_tz'" + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=True, index=[0, 1]) + assert msg in str(m[0].message) + + tm.assert_series_equal(result, expected) + + def test_to_series_keep_tz_deprecated_false(self, idx_expected): + idx, expected = idx_expected + + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=False, index=[0, 1]) + tm.assert_series_equal(result, expected.dt.tz_convert(None)) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) From 0b9cec774a805d4098e63deba2c1c0747874f2f1 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Feb 2021 21:58:10 -0800 Subject: [PATCH 2/2] TST/REF: collect tests by method --- pandas/tests/frame/methods/test_reindex.py | 37 ++++++++++++++++++ pandas/tests/frame/methods/test_set_index.py | 4 ++ pandas/tests/frame/test_alter_axes.py | 41 ++------------------ 3 files changed, 44 insertions(+), 38 deletions(-) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index e4e2656f4337c..fc4829ac41a26 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -21,6 +21,43 @@ import pandas.core.common as com +class TestReindexSetIndex: + # Tests that check both reindex and set_index + + def test_dti_set_index_reindex_datetimeindex(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") + idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_dti_set_index_reindex_freq_with_tz(self): + # GH#11314 with tz + index = date_range( + datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" + ) + df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) + new_index = date_range( + datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" + ) + + result = df.set_index(new_index) + assert result.index.freq == index.freq + + def test_set_reset_index_intervalindex(self): + + df = DataFrame({"A": range(10)}) + ser = pd.cut(df.A, 5) + df["B"] = ser + df = df.set_index("B") + + df = df.reset_index() + + class TestDataFrameSelectReindex: # These are specific reindex-based tests; other indexing tests should go in # test_indexing diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index b66a95bae51c5..70232dfd1d79a 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -1,3 +1,7 @@ +""" +See also: test_reindex.py:TestReindexSetIndex +""" + from datetime import datetime, timedelta import numpy as np diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 9d56adba08b9a..c68171ab254c7 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,13 +1,14 @@ from datetime import datetime -import numpy as np import pytz -from pandas import DataFrame, cut, date_range +from pandas import DataFrame import pandas._testing as tm class TestDataFrameAlterAxes: + # Tests for setting index/columns attributes directly (i.e. __setattr__) + def test_set_axis_setattr_index(self): # GH 6785 # set the index manually @@ -18,31 +19,6 @@ def test_set_axis_setattr_index(self): df.pop("ts") tm.assert_frame_equal(df, expected) - def test_dti_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") - idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.reindex(idx2) - tm.assert_index_equal(df.index, idx2) - - def test_dti_set_index_reindex_with_tz(self): - # GH 11314 - # with tz - index = date_range( - datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" - ) - df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) - new_index = date_range( - datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" - ) - - result = df.set_index(new_index) - assert result.index.freq == index.freq - # Renaming def test_assign_columns(self, float_frame): @@ -52,14 +28,3 @@ def test_assign_columns(self, float_frame): df.columns = ["foo", "bar", "baz", "quux", "foo2"] tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False) tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False) - - -class TestIntervalIndex: - def test_set_reset_index(self): - - df = DataFrame({"A": range(10)}) - s = cut(df.A, 5) - df["B"] = s - df = df.set_index("B") - - df = df.reset_index()