diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py new file mode 100644 index 0000000000000..36a9a6b5b3d58 --- /dev/null +++ b/pandas/tests/frame/methods/test_align.py @@ -0,0 +1,245 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series +import pandas._testing as tm + + +class TestDataFrameAlign: + def test_align_float(self, float_frame): + af, bf = float_frame.align(float_frame) + assert af._data is not float_frame._data + + af, bf = float_frame.align(float_frame, copy=False) + assert af._data is float_frame._data + + # axis = 0 + other = float_frame.iloc[:-5, :3] + af, bf = float_frame.align(other, axis=0, fill_value=-1) + + tm.assert_index_equal(bf.columns, other.columns) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_b = other.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="right", axis=0) + tm.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(af.index, other.index) + + # axis = 1 + other = float_frame.iloc[:-5, :3].copy() + af, bf = float_frame.align(other, axis=1) + tm.assert_index_equal(bf.columns, float_frame.columns) + tm.assert_index_equal(bf.index, other.index) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_b = other.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + + # TODO(wesm): unused? + diff_b_vals = bf.reindex(diff_b).values # noqa + + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="inner", axis=1) + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None + ) + tm.assert_index_equal(bf.index, Index([])) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + # Try to align DataFrame to Series along bad axis + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + float_frame.align(af.iloc[0, :3], join="inner", axis=2) + + # align dataframe to series with broadcast or not + idx = float_frame.index + s = Series(range(len(idx)), index=idx) + + left, right = float_frame.align(s, axis=0) + tm.assert_index_equal(left.index, float_frame.index) + tm.assert_index_equal(right.index, float_frame.index) + assert isinstance(right, Series) + + left, right = float_frame.align(s, broadcast_axis=1) + tm.assert_index_equal(left.index, float_frame.index) + expected = {c: s for c in float_frame.columns} + expected = DataFrame( + expected, index=float_frame.index, columns=float_frame.columns + ) + tm.assert_frame_equal(right, expected) + + # see gh-9558 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df[df["a"] == 2] + expected = DataFrame([[2, 5]], index=[1], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + result = df.where(df["a"] == 2, 0) + expected = DataFrame({"a": [0, 2, 0], "b": [0, 5, 0]}) + tm.assert_frame_equal(result, expected) + + def test_align_int(self, int_frame): + # test other non-float types + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = int_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + def test_align_mixed_type(self, float_string_frame): + + af, bf = float_string_frame.align( + float_string_frame, join="inner", axis=1, method="pad" + ) + tm.assert_index_equal(bf.columns, float_string_frame.columns) + + def test_align_mixed_float(self, mixed_float_frame): + # mixed floats/ints + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + def test_align_mixed_int(self, mixed_int_frame): + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_int_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + def test_align_multiindex(self): + # GH#10665 + # same test cases as test_align_multiindex in test_series.py + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = pd.Index(range(2), name="b") + df1 = pd.DataFrame(np.arange(12, dtype="int64"), index=midx) + df2 = pd.DataFrame(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = df1.align(df2, join="left") + res2l, res2r = df2.align(df1, join="right") + + expl = df1 + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + res1l, res1r = df1.align(df2, join="right") + res2l, res2r = df2.align(df1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + def test_align_series_combinations(self): + df = pd.DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = pd.Series([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = pd.DataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = pd.Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x") + + tm.assert_frame_equal(res1, exp1) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + tm.assert_series_equal(res1, exp2) + tm.assert_frame_equal(res2, exp1) + + def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): + aa, ab = a.align( + b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis + ) + + join_index, join_columns = None, None + + ea, eb = a, b + if axis is None or axis == 0: + join_index = a.index.join(b.index, how=how) + ea = ea.reindex(index=join_index) + eb = eb.reindex(index=join_index) + + if axis is None or axis == 1: + join_columns = a.columns.join(b.columns, how=how) + ea = ea.reindex(columns=join_columns) + eb = eb.reindex(columns=join_columns) + + ea = ea.fillna(axis=fill_axis, method=method, limit=limit) + eb = eb.fillna(axis=fill_axis, method=method, limit=limit) + + tm.assert_frame_equal(aa, ea) + tm.assert_frame_equal(ab, eb) + + @pytest.mark.parametrize("meth", ["pad", "bfill"]) + @pytest.mark.parametrize("ax", [0, 1, None]) + @pytest.mark.parametrize("fax", [0, 1]) + @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) + def test_align_fill_method(self, how, meth, ax, fax, float_frame): + df = float_frame + self._check_align_fill(df, how, meth, ax, fax) + + def _check_align_fill(self, frame, kind, meth, ax, fax): + left = frame.iloc[0:4, :10] + right = frame.iloc[2:, 6:] + empty = frame.iloc[:0, :0] + + self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty left + self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty right + self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # both empty + self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index ea21359c2f75c..4c33421a78ef2 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -558,188 +558,6 @@ def test_reindex_api_equivalence(self): for res in [res2, res3]: tm.assert_frame_equal(res1, res) - def test_align_float(self, float_frame): - af, bf = float_frame.align(float_frame) - assert af._data is not float_frame._data - - af, bf = float_frame.align(float_frame, copy=False) - assert af._data is float_frame._data - - # axis = 0 - other = float_frame.iloc[:-5, :3] - af, bf = float_frame.align(other, axis=0, fill_value=-1) - - tm.assert_index_equal(bf.columns, other.columns) - - # test fill value - join_idx = float_frame.index.join(other.index) - diff_a = float_frame.index.difference(join_idx) - diff_b = other.index.difference(join_idx) - diff_a_vals = af.reindex(diff_a).values - diff_b_vals = bf.reindex(diff_b).values - assert (diff_a_vals == -1).all() - - af, bf = float_frame.align(other, join="right", axis=0) - tm.assert_index_equal(bf.columns, other.columns) - tm.assert_index_equal(bf.index, other.index) - tm.assert_index_equal(af.index, other.index) - - # axis = 1 - other = float_frame.iloc[:-5, :3].copy() - af, bf = float_frame.align(other, axis=1) - tm.assert_index_equal(bf.columns, float_frame.columns) - tm.assert_index_equal(bf.index, other.index) - - # test fill value - join_idx = float_frame.index.join(other.index) - diff_a = float_frame.index.difference(join_idx) - diff_b = other.index.difference(join_idx) - diff_a_vals = af.reindex(diff_a).values - - # TODO(wesm): unused? - diff_b_vals = bf.reindex(diff_b).values # noqa - - assert (diff_a_vals == -1).all() - - af, bf = float_frame.align(other, join="inner", axis=1) - tm.assert_index_equal(bf.columns, other.columns) - - af, bf = float_frame.align(other, join="inner", axis=1, method="pad") - tm.assert_index_equal(bf.columns, other.columns) - - af, bf = float_frame.align( - other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None - ) - tm.assert_index_equal(bf.index, Index([])) - - af, bf = float_frame.align( - other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 - ) - tm.assert_index_equal(bf.index, Index([])) - - # Try to align DataFrame to Series along bad axis - msg = "No axis named 2 for object type DataFrame" - with pytest.raises(ValueError, match=msg): - float_frame.align(af.iloc[0, :3], join="inner", axis=2) - - # align dataframe to series with broadcast or not - idx = float_frame.index - s = Series(range(len(idx)), index=idx) - - left, right = float_frame.align(s, axis=0) - tm.assert_index_equal(left.index, float_frame.index) - tm.assert_index_equal(right.index, float_frame.index) - assert isinstance(right, Series) - - left, right = float_frame.align(s, broadcast_axis=1) - tm.assert_index_equal(left.index, float_frame.index) - expected = {c: s for c in float_frame.columns} - expected = DataFrame( - expected, index=float_frame.index, columns=float_frame.columns - ) - tm.assert_frame_equal(right, expected) - - # see gh-9558 - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - result = df[df["a"] == 2] - expected = DataFrame([[2, 5]], index=[1], columns=["a", "b"]) - tm.assert_frame_equal(result, expected) - - result = df.where(df["a"] == 2, 0) - expected = DataFrame({"a": [0, 2, 0], "b": [0, 5, 0]}) - tm.assert_frame_equal(result, expected) - - def test_align_int(self, int_frame): - # test other non-float types - other = DataFrame(index=range(5), columns=["A", "B", "C"]) - - af, bf = int_frame.align(other, join="inner", axis=1, method="pad") - tm.assert_index_equal(bf.columns, other.columns) - - def test_align_mixed_type(self, float_string_frame): - - af, bf = float_string_frame.align( - float_string_frame, join="inner", axis=1, method="pad" - ) - tm.assert_index_equal(bf.columns, float_string_frame.columns) - - def test_align_mixed_float(self, mixed_float_frame): - # mixed floats/ints - other = DataFrame(index=range(5), columns=["A", "B", "C"]) - - af, bf = mixed_float_frame.align( - other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 - ) - tm.assert_index_equal(bf.index, Index([])) - - def test_align_mixed_int(self, mixed_int_frame): - other = DataFrame(index=range(5), columns=["A", "B", "C"]) - - af, bf = mixed_int_frame.align( - other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 - ) - tm.assert_index_equal(bf.index, Index([])) - - def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): - aa, ab = a.align( - b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis - ) - - join_index, join_columns = None, None - - ea, eb = a, b - if axis is None or axis == 0: - join_index = a.index.join(b.index, how=how) - ea = ea.reindex(index=join_index) - eb = eb.reindex(index=join_index) - - if axis is None or axis == 1: - join_columns = a.columns.join(b.columns, how=how) - ea = ea.reindex(columns=join_columns) - eb = eb.reindex(columns=join_columns) - - ea = ea.fillna(axis=fill_axis, method=method, limit=limit) - eb = eb.fillna(axis=fill_axis, method=method, limit=limit) - - tm.assert_frame_equal(aa, ea) - tm.assert_frame_equal(ab, eb) - - @pytest.mark.parametrize("meth", ["pad", "bfill"]) - @pytest.mark.parametrize("ax", [0, 1, None]) - @pytest.mark.parametrize("fax", [0, 1]) - @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) - def test_align_fill_method(self, how, meth, ax, fax, float_frame): - df = float_frame - self._check_align_fill(df, how, meth, ax, fax) - - def _check_align_fill(self, frame, kind, meth, ax, fax): - left = frame.iloc[0:4, :10] - right = frame.iloc[2:, 6:] - empty = frame.iloc[:0, :0] - - self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - - # empty left - self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - - # empty right - self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - - # both empty - self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth) - self._check_align( - empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 - ) - def test_align_int_fill_bug(self): # GH #910 X = np.arange(10 * 10, dtype="float64").reshape(10, 10) @@ -754,61 +572,6 @@ def test_align_int_fill_bug(self): expected = df2 - df2.mean() tm.assert_frame_equal(result, expected) - def test_align_multiindex(self): - # GH 10665 - # same test cases as test_align_multiindex in test_series.py - - midx = pd.MultiIndex.from_product( - [range(2), range(3), range(2)], names=("a", "b", "c") - ) - idx = pd.Index(range(2), name="b") - df1 = pd.DataFrame(np.arange(12, dtype="int64"), index=midx) - df2 = pd.DataFrame(np.arange(2, dtype="int64"), index=idx) - - # these must be the same results (but flipped) - res1l, res1r = df1.align(df2, join="left") - res2l, res2r = df2.align(df1, join="right") - - expl = df1 - tm.assert_frame_equal(expl, res1l) - tm.assert_frame_equal(expl, res2r) - expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) - tm.assert_frame_equal(expr, res1r) - tm.assert_frame_equal(expr, res2l) - - res1l, res1r = df1.align(df2, join="right") - res2l, res2r = df2.align(df1, join="left") - - exp_idx = pd.MultiIndex.from_product( - [range(2), range(2), range(2)], names=("a", "b", "c") - ) - expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) - tm.assert_frame_equal(expl, res1l) - tm.assert_frame_equal(expl, res2r) - expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx) - tm.assert_frame_equal(expr, res1r) - tm.assert_frame_equal(expr, res2l) - - def test_align_series_combinations(self): - df = pd.DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) - s = pd.Series([1, 2, 4], index=list("ABD"), name="x") - - # frame + series - res1, res2 = df.align(s, axis=0) - exp1 = pd.DataFrame( - {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, - index=list("ABCDE"), - ) - exp2 = pd.Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x") - - tm.assert_frame_equal(res1, exp1) - tm.assert_series_equal(res2, exp2) - - # series + frame - res1, res2 = s.align(df) - tm.assert_series_equal(res1, exp2) - tm.assert_frame_equal(res2, exp1) - def test_filter(self, float_frame, float_string_frame): # Items filtered = float_frame.filter(["A", "B", "E"]) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 7d147ccfd7776..c6052896c9e96 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -8,162 +8,6 @@ import pandas._testing as tm -@pytest.mark.parametrize( - "first_slice,second_slice", - [ - [[2, None], [None, -5]], - [[None, 0], [None, -5]], - [[None, -5], [None, 0]], - [[None, 0], [None, 0]], - ], -) -@pytest.mark.parametrize("fill", [None, -1]) -def test_align(datetime_series, first_slice, second_slice, join_type, fill): - a = datetime_series[slice(*first_slice)] - b = datetime_series[slice(*second_slice)] - - aa, ab = a.align(b, join=join_type, fill_value=fill) - - join_index = a.index.join(b.index, how=join_type) - if fill is not None: - diff_a = aa.index.difference(join_index) - diff_b = ab.index.difference(join_index) - if len(diff_a) > 0: - assert (aa.reindex(diff_a) == fill).all() - if len(diff_b) > 0: - assert (ab.reindex(diff_b) == fill).all() - - ea = a.reindex(join_index) - eb = b.reindex(join_index) - - if fill is not None: - ea = ea.fillna(fill) - eb = eb.fillna(fill) - - tm.assert_series_equal(aa, ea) - tm.assert_series_equal(ab, eb) - assert aa.name == "ts" - assert ea.name == "ts" - assert ab.name == "ts" - assert eb.name == "ts" - - -@pytest.mark.parametrize( - "first_slice,second_slice", - [ - [[2, None], [None, -5]], - [[None, 0], [None, -5]], - [[None, -5], [None, 0]], - [[None, 0], [None, 0]], - ], -) -@pytest.mark.parametrize("method", ["pad", "bfill"]) -@pytest.mark.parametrize("limit", [None, 1]) -def test_align_fill_method( - datetime_series, first_slice, second_slice, join_type, method, limit -): - a = datetime_series[slice(*first_slice)] - b = datetime_series[slice(*second_slice)] - - aa, ab = a.align(b, join=join_type, method=method, limit=limit) - - join_index = a.index.join(b.index, how=join_type) - ea = a.reindex(join_index) - eb = b.reindex(join_index) - - ea = ea.fillna(method=method, limit=limit) - eb = eb.fillna(method=method, limit=limit) - - tm.assert_series_equal(aa, ea) - tm.assert_series_equal(ab, eb) - - -def test_align_nocopy(datetime_series): - b = datetime_series[:5].copy() - - # do copy - a = datetime_series.copy() - ra, _ = a.align(b, join="left") - ra[:5] = 5 - assert not (a[:5] == 5).any() - - # do not copy - a = datetime_series.copy() - ra, _ = a.align(b, join="left", copy=False) - ra[:5] = 5 - assert (a[:5] == 5).all() - - # do copy - a = datetime_series.copy() - b = datetime_series[:5].copy() - _, rb = a.align(b, join="right") - rb[:3] = 5 - assert not (b[:3] == 5).any() - - # do not copy - a = datetime_series.copy() - b = datetime_series[:5].copy() - _, rb = a.align(b, join="right", copy=False) - rb[:2] = 5 - assert (b[:2] == 5).all() - - -def test_align_same_index(datetime_series): - a, b = datetime_series.align(datetime_series, copy=False) - assert a.index is datetime_series.index - assert b.index is datetime_series.index - - a, b = datetime_series.align(datetime_series, copy=True) - assert a.index is not datetime_series.index - assert b.index is not datetime_series.index - - -def test_align_multiindex(): - # GH 10665 - - midx = pd.MultiIndex.from_product( - [range(2), range(3), range(2)], names=("a", "b", "c") - ) - idx = pd.Index(range(2), name="b") - s1 = pd.Series(np.arange(12, dtype="int64"), index=midx) - s2 = pd.Series(np.arange(2, dtype="int64"), index=idx) - - # these must be the same results (but flipped) - res1l, res1r = s1.align(s2, join="left") - res2l, res2r = s2.align(s1, join="right") - - expl = s1 - tm.assert_series_equal(expl, res1l) - tm.assert_series_equal(expl, res2r) - expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) - tm.assert_series_equal(expr, res1r) - tm.assert_series_equal(expr, res2l) - - res1l, res1r = s1.align(s2, join="right") - res2l, res2r = s2.align(s1, join="left") - - exp_idx = pd.MultiIndex.from_product( - [range(2), range(2), range(2)], names=("a", "b", "c") - ) - expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) - tm.assert_series_equal(expl, res1l) - tm.assert_series_equal(expl, res2r) - expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx) - tm.assert_series_equal(expr, res1r) - tm.assert_series_equal(expr, res2l) - - -@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None]) -def test_align_method(method): - # GH31788 - ser = pd.Series(range(3), index=range(3)) - df = pd.DataFrame(0.0, index=range(3), columns=range(3)) - - result_ser, result_df = ser.align(df, method=method) - tm.assert_series_equal(result_ser, ser) - tm.assert_frame_equal(result_df, df) - - def test_reindex(datetime_series, string_series): identity = string_series.reindex(string_series.index) diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py new file mode 100644 index 0000000000000..974ba5d1e35a7 --- /dev/null +++ b/pandas/tests/series/methods/test_align.py @@ -0,0 +1,182 @@ +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import Series, date_range, period_range +import pandas._testing as tm + + +@pytest.mark.parametrize( + "first_slice,second_slice", + [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]], + ], +) +@pytest.mark.parametrize("fill", [None, -1]) +def test_align(datetime_series, first_slice, second_slice, join_type, fill): + a = datetime_series[slice(*first_slice)] + b = datetime_series[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, fill_value=fill) + + join_index = a.index.join(b.index, how=join_type) + if fill is not None: + diff_a = aa.index.difference(join_index) + diff_b = ab.index.difference(join_index) + if len(diff_a) > 0: + assert (aa.reindex(diff_a) == fill).all() + if len(diff_b) > 0: + assert (ab.reindex(diff_b) == fill).all() + + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + if fill is not None: + ea = ea.fillna(fill) + eb = eb.fillna(fill) + + tm.assert_series_equal(aa, ea) + tm.assert_series_equal(ab, eb) + assert aa.name == "ts" + assert ea.name == "ts" + assert ab.name == "ts" + assert eb.name == "ts" + + +@pytest.mark.parametrize( + "first_slice,second_slice", + [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]], + ], +) +@pytest.mark.parametrize("method", ["pad", "bfill"]) +@pytest.mark.parametrize("limit", [None, 1]) +def test_align_fill_method( + datetime_series, first_slice, second_slice, join_type, method, limit +): + a = datetime_series[slice(*first_slice)] + b = datetime_series[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, method=method, limit=limit) + + join_index = a.index.join(b.index, how=join_type) + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + ea = ea.fillna(method=method, limit=limit) + eb = eb.fillna(method=method, limit=limit) + + tm.assert_series_equal(aa, ea) + tm.assert_series_equal(ab, eb) + + +def test_align_nocopy(datetime_series): + b = datetime_series[:5].copy() + + # do copy + a = datetime_series.copy() + ra, _ = a.align(b, join="left") + ra[:5] = 5 + assert not (a[:5] == 5).any() + + # do not copy + a = datetime_series.copy() + ra, _ = a.align(b, join="left", copy=False) + ra[:5] = 5 + assert (a[:5] == 5).all() + + # do copy + a = datetime_series.copy() + b = datetime_series[:5].copy() + _, rb = a.align(b, join="right") + rb[:3] = 5 + assert not (b[:3] == 5).any() + + # do not copy + a = datetime_series.copy() + b = datetime_series[:5].copy() + _, rb = a.align(b, join="right", copy=False) + rb[:2] = 5 + assert (b[:2] == 5).all() + + +def test_align_same_index(datetime_series): + a, b = datetime_series.align(datetime_series, copy=False) + assert a.index is datetime_series.index + assert b.index is datetime_series.index + + a, b = datetime_series.align(datetime_series, copy=True) + assert a.index is not datetime_series.index + assert b.index is not datetime_series.index + + +def test_align_multiindex(): + # GH 10665 + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = pd.Index(range(2), name="b") + s1 = pd.Series(np.arange(12, dtype="int64"), index=midx) + s2 = pd.Series(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = s1.align(s2, join="left") + res2l, res2r = s2.align(s1, join="right") + + expl = s1 + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + res1l, res1r = s1.align(s2, join="right") + res2l, res2r = s2.align(s1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + +@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None]) +def test_align_with_dataframe_method(method): + # GH31788 + ser = pd.Series(range(3), index=range(3)) + df = pd.DataFrame(0.0, index=range(3), columns=range(3)) + + result_ser, result_df = ser.align(df, method=method) + tm.assert_series_equal(result_ser, ser) + tm.assert_frame_equal(result_df, df) + + +def test_align_dt64tzindex_mismatched_tzs(): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + ser = Series(np.random.randn(len(idx1)), index=idx1) + ser_central = ser.tz_convert("US/Central") + # different timezones convert to UTC + + new1, new2 = ser.align(ser_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + +def test_align_periodindex(join_type): + rng = period_range("1/1/2000", "1/1/2010", freq="A") + ts = Series(np.random.randn(len(rng)), index=rng) + + # TODO: assert something? + ts.align(ts[::2], join=join_type) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index f41245c2872a7..d5a3efcf5757c 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -98,12 +98,6 @@ def test_intercept_astype_object(self): result = df.values.squeeze() assert (result[:, 0] == expected.values).all() - def test_align_series(self, join_type): - rng = period_range("1/1/2000", "1/1/2010", freq="A") - ts = Series(np.random.randn(len(rng)), index=rng) - - ts.align(ts[::2], join=join_type) - @pytest.mark.parametrize( "input_vals", [ diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index ae4fd12abdb88..dfff1d581fe44 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -6,7 +6,6 @@ from dateutil.tz import tzoffset import numpy as np import pytest -import pytz from pandas._libs.tslibs import conversion, timezones @@ -38,16 +37,6 @@ def test_string_index_alias_tz_aware(self, tz): result = ser["1/3/2000"] tm.assert_almost_equal(result, ser[2]) - def test_series_align_aware(self): - idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") - ser = Series(np.random.randn(len(idx1)), index=idx1) - ser_central = ser.tz_convert("US/Central") - # # different timezones convert to UTC - - new1, new2 = ser.align(ser_central) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"]) def test_getitem_pydatetime_tz(self, tzstr): tz = timezones.maybe_get_tz(tzstr)