From 8c22d1a75ddf88db69a1bd913bdd50391dd0ab03 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 16:50:11 -0800 Subject: [PATCH 1/9] fixturize where tests --- pandas/tests/frame/indexing/test_datetime.py | 7 - pandas/tests/frame/indexing/test_where.py | 128 ++++++++++--------- 2 files changed, 67 insertions(+), 68 deletions(-) diff --git a/pandas/tests/frame/indexing/test_datetime.py b/pandas/tests/frame/indexing/test_datetime.py index a1c12be2b0180..6bfcac3793584 100644 --- a/pandas/tests/frame/indexing/test_datetime.py +++ b/pandas/tests/frame/indexing/test_datetime.py @@ -45,13 +45,6 @@ def test_set_reset(self): df = result.set_index("foo") tm.assert_index_equal(df.index, idx) - def test_transpose(self, timezone_frame): - - result = timezone_frame.T - expected = DataFrame(timezone_frame.values.T) - expected.index = ["A", "B", "C"] - tm.assert_frame_equal(result, expected) - def test_scalar_assignment(self): # issue #19843 df = pd.DataFrame(index=(0, 1, 2)) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index df1b128dcd227..507b2e9cd237b 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -10,22 +10,30 @@ import pandas._testing as tm -class TestDataFrameIndexingWhere: - def test_where(self, float_string_frame, mixed_float_frame, mixed_int_frame): - default_frame = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) - - def _safe_add(df): - # only add to the numeric items - def is_ok(s): - return ( - issubclass(s.dtype.type, (np.integer, np.floating)) - and s.dtype != "uint8" - ) - - return DataFrame( - dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items()) - ) +@pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"]) +def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame): + if request.param == "default": + return DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + if request.param == "float_string": + return float_string_frame + if request.param == "mixed_float": + return mixed_float_frame + if request.param == "mixed_int": + return mixed_int_frame + + +def _safe_add(df): + # only add to the numeric items + def is_ok(s): + return ( + issubclass(s.dtype.type, (np.integer, np.floating)) and s.dtype != "uint8" + ) + + return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items())) + +class TestDataFrameIndexingWhere: + def test_where_get(self, where_frame, float_string_frame): def _check_get(df, cond, check_dtypes=True): other1 = _safe_add(df) rs = df.where(cond, other1) @@ -40,19 +48,15 @@ def _check_get(df, cond, check_dtypes=True): assert (rs.dtypes == df.dtypes).all() # check getting - for df in [ - default_frame, - float_string_frame, - mixed_float_frame, - mixed_int_frame, - ]: - if df is float_string_frame: - with pytest.raises(TypeError): - df > 0 - continue - cond = df > 0 - _check_get(df, cond) - + df = where_frame + if df is float_string_frame: + with pytest.raises(TypeError): + df > 0 + return + cond = df > 0 + _check_get(df, cond) + + def test_where_upcasting(self): # upcasting case (GH # 2794) df = DataFrame( { @@ -78,6 +82,7 @@ def _check_get(df, cond, check_dtypes=True): tm.assert_series_equal(result, expected) + def test_where_alignment(self, where_frame, float_string_frame): # aligning def _check_align(df, cond, other, check_dtypes=True): rs = df.where(cond, other) @@ -107,27 +112,30 @@ def _check_align(df, cond, other, check_dtypes=True): if check_dtypes and not isinstance(other, np.ndarray): assert (rs.dtypes == df.dtypes).all() - for df in [float_string_frame, mixed_float_frame, mixed_int_frame]: - if df is float_string_frame: - with pytest.raises(TypeError): - df > 0 - continue + df = where_frame + if df is float_string_frame: + with pytest.raises(TypeError): + df > 0 + return - # other is a frame - cond = (df > 0)[1:] - _check_align(df, cond, _safe_add(df)) + # other is a frame + cond = (df > 0)[1:] + _check_align(df, cond, _safe_add(df)) - # check other is ndarray - cond = df > 0 - _check_align(df, cond, (_safe_add(df).values)) + # check other is ndarray + cond = df > 0 + _check_align(df, cond, (_safe_add(df).values)) - # integers are upcast, so don't check the dtypes - cond = df > 0 - check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes) - _check_align(df, cond, np.nan, check_dtypes=check_dtypes) + # integers are upcast, so don't check the dtypes + cond = df > 0 + check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes) + _check_align(df, cond, np.nan, check_dtypes=check_dtypes) + def test_where_invalid(self): # invalid conditions - df = default_frame + df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + cond = df > 0 + err1 = (df + 1).values[0:2, :] msg = "other must be the same shape as self when an ndarray" with pytest.raises(ValueError, match=msg): @@ -144,7 +152,9 @@ def _check_align(df, cond, other, check_dtypes=True): with pytest.raises(ValueError, match=msg): df.mask(0) + def test_where_set(self, where_frame, float_string_frame): # where inplace + def _check_set(df, cond, check_dtypes=True): dfi = df.copy() econd = cond.reindex_like(df).fillna(True) @@ -160,27 +170,23 @@ def _check_set(df, cond, check_dtypes=True): v = np.dtype("float64") assert dfi[k].dtype == v - for df in [ - default_frame, - float_string_frame, - mixed_float_frame, - mixed_int_frame, - ]: - if df is float_string_frame: - with pytest.raises(TypeError): - df > 0 - continue + df = where_frame + if df is float_string_frame: + with pytest.raises(TypeError): + df > 0 + return - cond = df > 0 - _check_set(df, cond) + cond = df > 0 + _check_set(df, cond) - cond = df >= 0 - _check_set(df, cond) + cond = df >= 0 + _check_set(df, cond) - # aligning - cond = (df >= 0)[1:] - _check_set(df, cond) + # aligning + cond = (df >= 0)[1:] + _check_set(df, cond) + def test_where_series_slicing(self): # GH 10218 # test DataFrame.where with Series slicing df = DataFrame({"a": range(3), "b": range(4, 7)}) From 2578185c4244cc8eab0bb441e761c89b017c25f1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 16:58:57 -0800 Subject: [PATCH 2/9] organize DataFrame tests --- pandas/tests/frame/indexing/test_indexing.py | 115 ------------------- pandas/tests/frame/indexing/test_xs.py | 95 +++++++++++++++ pandas/tests/frame/methods/test_head_tail.py | 30 +++++ pandas/tests/frame/methods/test_transpose.py | 7 ++ 4 files changed, 132 insertions(+), 115 deletions(-) create mode 100644 pandas/tests/frame/indexing/test_xs.py create mode 100644 pandas/tests/frame/methods/test_head_tail.py diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 64d0f9ee2b062..5f9a28bda02a0 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1916,89 +1916,6 @@ def test_at_time_between_time_datetimeindex(self): result.loc[bkey] = df.iloc[binds] tm.assert_frame_equal(result, df) - def test_xs(self, float_frame, datetime_frame): - idx = float_frame.index[5] - xs = float_frame.xs(idx) - for item, value in xs.items(): - if np.isnan(value): - assert np.isnan(float_frame[item][idx]) - else: - assert value == float_frame[item][idx] - - # mixed-type xs - test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} - frame = DataFrame(test_data) - xs = frame.xs("1") - assert xs.dtype == np.object_ - assert xs["A"] == 1 - assert xs["B"] == "1" - - with pytest.raises( - KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") - ): - datetime_frame.xs(datetime_frame.index[0] - BDay()) - - # xs get column - series = float_frame.xs("A", axis=1) - expected = float_frame["A"] - tm.assert_series_equal(series, expected) - - # view is returned if possible - series = float_frame.xs("A", axis=1) - series[:] = 5 - assert (expected == 5).all() - - def test_xs_corner(self): - # pathological mixed-type reordering case - df = DataFrame(index=[0]) - df["A"] = 1.0 - df["B"] = "foo" - df["C"] = 2.0 - df["D"] = "bar" - df["E"] = 3.0 - - xs = df.xs(0) - exp = pd.Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0) - tm.assert_series_equal(xs, exp) - - # no columns but Index(dtype=object) - df = DataFrame(index=["a", "b", "c"]) - result = df.xs("a") - expected = Series([], name="a", index=pd.Index([]), dtype=np.float64) - tm.assert_series_equal(result, expected) - - def test_xs_duplicates(self): - df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) - - cross = df.xs("c") - exp = df.iloc[2] - tm.assert_series_equal(cross, exp) - - def test_xs_keep_level(self): - df = DataFrame( - { - "day": {0: "sat", 1: "sun"}, - "flavour": {0: "strawberry", 1: "strawberry"}, - "sales": {0: 10, 1: 12}, - "year": {0: 2008, 1: 2008}, - } - ).set_index(["year", "flavour", "day"]) - result = df.xs("sat", level="day", drop_level=False) - expected = df[:1] - tm.assert_frame_equal(result, expected) - - result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) - tm.assert_frame_equal(result, expected) - - def test_xs_view(self): - # in 0.14 this will return a view if possible a copy otherwise, but - # this is numpy dependent - - dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) - - dm.xs(2)[:] = 10 - assert (dm.xs(2) == 10).all() - def test_index_namedtuple(self): from collections import namedtuple @@ -2154,31 +2071,6 @@ def test_mask_callable(self): tm.assert_frame_equal(result, exp) tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10)) - def test_head_tail(self, float_frame): - tm.assert_frame_equal(float_frame.head(), float_frame[:5]) - tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) - - tm.assert_frame_equal(float_frame.head(0), float_frame[0:0]) - tm.assert_frame_equal(float_frame.tail(0), float_frame[0:0]) - - tm.assert_frame_equal(float_frame.head(-1), float_frame[:-1]) - tm.assert_frame_equal(float_frame.tail(-1), float_frame[1:]) - tm.assert_frame_equal(float_frame.head(1), float_frame[:1]) - tm.assert_frame_equal(float_frame.tail(1), float_frame[-1:]) - # with a float index - df = float_frame.copy() - df.index = np.arange(len(float_frame)) + 0.1 - tm.assert_frame_equal(df.head(), df.iloc[:5]) - tm.assert_frame_equal(df.tail(), df.iloc[-5:]) - tm.assert_frame_equal(df.head(0), df[0:0]) - tm.assert_frame_equal(df.tail(0), df[0:0]) - tm.assert_frame_equal(df.head(-1), df.iloc[:-1]) - tm.assert_frame_equal(df.tail(-1), df.iloc[1:]) - # test empty dataframe - empty_df = DataFrame() - tm.assert_frame_equal(empty_df.tail(), empty_df) - tm.assert_frame_equal(empty_df.head(), empty_df) - def test_type_error_multiindex(self): # See gh-12218 df = DataFrame( @@ -2269,10 +2161,3 @@ def test_set_reset(self): df = result.set_index("foo") tm.assert_index_equal(df.index, idx) - - def test_transpose(self, uint64_frame): - - result = uint64_frame.T - expected = DataFrame(uint64_frame.values.T) - expected.index = ["A", "B"] - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py new file mode 100644 index 0000000000000..71b40585f0c2f --- /dev/null +++ b/pandas/tests/frame/indexing/test_xs.py @@ -0,0 +1,95 @@ +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm + +from pandas.tseries.offsets import BDay + + +class TestXS: + def test_xs(self, float_frame, datetime_frame): + idx = float_frame.index[5] + xs = float_frame.xs(idx) + for item, value in xs.items(): + if np.isnan(value): + assert np.isnan(float_frame[item][idx]) + else: + assert value == float_frame[item][idx] + + # mixed-type xs + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data) + xs = frame.xs("1") + assert xs.dtype == np.object_ + assert xs["A"] == 1 + assert xs["B"] == "1" + + with pytest.raises( + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") + ): + datetime_frame.xs(datetime_frame.index[0] - BDay()) + + # xs get column + series = float_frame.xs("A", axis=1) + expected = float_frame["A"] + tm.assert_series_equal(series, expected) + + # view is returned if possible + series = float_frame.xs("A", axis=1) + series[:] = 5 + assert (expected == 5).all() + + def test_xs_corner(self): + # pathological mixed-type reordering case + df = DataFrame(index=[0]) + df["A"] = 1.0 + df["B"] = "foo" + df["C"] = 2.0 + df["D"] = "bar" + df["E"] = 3.0 + + xs = df.xs(0) + exp = pd.Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0) + tm.assert_series_equal(xs, exp) + + # no columns but Index(dtype=object) + df = DataFrame(index=["a", "b", "c"]) + result = df.xs("a") + expected = Series([], name="a", index=pd.Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_xs_duplicates(self): + df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) + + cross = df.xs("c") + exp = df.iloc[2] + tm.assert_series_equal(cross, exp) + + def test_xs_keep_level(self): + df = DataFrame( + { + "day": {0: "sat", 1: "sun"}, + "flavour": {0: "strawberry", 1: "strawberry"}, + "sales": {0: 10, 1: 12}, + "year": {0: 2008, 1: 2008}, + } + ).set_index(["year", "flavour", "day"]) + result = df.xs("sat", level="day", drop_level=False) + expected = df[:1] + tm.assert_frame_equal(result, expected) + + result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) + tm.assert_frame_equal(result, expected) + + def test_xs_view(self): + # in 0.14 this will return a view if possible a copy otherwise, but + # this is numpy dependent + + dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) + + dm.xs(2)[:] = 10 + assert (dm.xs(2) == 10).all() diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py new file mode 100644 index 0000000000000..93763bc12ce0d --- /dev/null +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -0,0 +1,30 @@ +import numpy as np + +from pandas import DataFrame +import pandas._testing as tm + + +def test_head_tail(float_frame): + tm.assert_frame_equal(float_frame.head(), float_frame[:5]) + tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) + + tm.assert_frame_equal(float_frame.head(0), float_frame[0:0]) + tm.assert_frame_equal(float_frame.tail(0), float_frame[0:0]) + + tm.assert_frame_equal(float_frame.head(-1), float_frame[:-1]) + tm.assert_frame_equal(float_frame.tail(-1), float_frame[1:]) + tm.assert_frame_equal(float_frame.head(1), float_frame[:1]) + tm.assert_frame_equal(float_frame.tail(1), float_frame[-1:]) + # with a float index + df = float_frame.copy() + df.index = np.arange(len(float_frame)) + 0.1 + tm.assert_frame_equal(df.head(), df.iloc[:5]) + tm.assert_frame_equal(df.tail(), df.iloc[-5:]) + tm.assert_frame_equal(df.head(0), df[0:0]) + tm.assert_frame_equal(df.tail(0), df[0:0]) + tm.assert_frame_equal(df.head(-1), df.iloc[:-1]) + tm.assert_frame_equal(df.tail(-1), df.iloc[1:]) + # test empty dataframe + empty_df = DataFrame() + tm.assert_frame_equal(empty_df.tail(), empty_df) + tm.assert_frame_equal(empty_df.head(), empty_df) diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 428b9e5068407..0b1ace4fd2e7a 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -41,3 +41,10 @@ def test_transpose_object_to_tzaware_mixed_tz(self): assert (df2.dtypes == object).all() res2 = df2.T assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() + + def test_transpose_uint64(self, uint64_frame): + + result = uint64_frame.T + expected = pd.DataFrame(uint64_frame.values.T) + expected.index = ["A", "B"] + tm.assert_frame_equal(result, expected) From d5d3378bf1a9f90b807b59e1418498dc3ab2e37a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 17:01:55 -0800 Subject: [PATCH 3/9] separate out get tests --- pandas/tests/frame/indexing/test_indexing.py | 44 ++++++++++---------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 5f9a28bda02a0..9d4dfc65f628d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -28,6 +28,29 @@ from pandas.tseries.offsets import BDay +class TestGet: + def test_get(self, float_frame): + b = float_frame.get("B") + tm.assert_series_equal(b, float_frame["B"]) + + assert float_frame.get("foo") is None + tm.assert_series_equal( + float_frame.get("foo", float_frame["B"]), float_frame["B"] + ) + + @pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(columns=list("AB")), + DataFrame(columns=list("AB"), index=range(3)), + ], + ) + def test_get_none(self, df): + # see gh-5652 + assert df.get(None) is None + + class TestDataFrameIndexing: def test_getitem(self, float_frame): # Slicing @@ -64,27 +87,6 @@ def test_getitem_dupe_cols(self): with pytest.raises(KeyError, match=re.escape(msg)): df[["baf"]] - def test_get(self, float_frame): - b = float_frame.get("B") - tm.assert_series_equal(b, float_frame["B"]) - - assert float_frame.get("foo") is None - tm.assert_series_equal( - float_frame.get("foo", float_frame["B"]), float_frame["B"] - ) - - @pytest.mark.parametrize( - "df", - [ - DataFrame(), - DataFrame(columns=list("AB")), - DataFrame(columns=list("AB"), index=range(3)), - ], - ) - def test_get_none(self, df): - # see gh-5652 - assert df.get(None) is None - @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) From 203a7f31c9c4d3d54747d580bc7e7ee7f1f5b532 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:23:12 -0800 Subject: [PATCH 4/9] organize transpose tests --- pandas/tests/frame/indexing/test_iat.py | 8 +++++ pandas/tests/frame/indexing/test_indexing.py | 8 ----- pandas/tests/frame/methods/test_transpose.py | 26 ++++++++++++++ pandas/tests/frame/test_api.py | 38 ++++---------------- 4 files changed, 41 insertions(+), 39 deletions(-) create mode 100644 pandas/tests/frame/indexing/test_iat.py diff --git a/pandas/tests/frame/indexing/test_iat.py b/pandas/tests/frame/indexing/test_iat.py new file mode 100644 index 0000000000000..e583fff9b92db --- /dev/null +++ b/pandas/tests/frame/indexing/test_iat.py @@ -0,0 +1,8 @@ + +def test_iat(self, float_frame): + + for i, row in enumerate(float_frame.index): + for j, col in enumerate(float_frame.columns): + result = float_frame.iat[i, j] + expected = float_frame.at[row, col] + assert result == expected diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9d4dfc65f628d..2e86acf4f789a 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1549,14 +1549,6 @@ def test_loc_duplicates(self): df.loc[trange[bool_idx], "A"] += 6 tm.assert_frame_equal(df, expected) - def test_iat(self, float_frame): - - for i, row in enumerate(float_frame.index): - for j, col in enumerate(float_frame.columns): - result = float_frame.iat[i, j] - expected = float_frame.at[row, col] - assert result == expected - @pytest.mark.parametrize( "method,expected_values", [ diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 0b1ace4fd2e7a..a5fe5f3a6d5e4 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -1,3 +1,5 @@ +import numpy as np + import pandas as pd import pandas._testing as tm @@ -48,3 +50,27 @@ def test_transpose_uint64(self, uint64_frame): expected = pd.DataFrame(uint64_frame.values.T) expected.index = ["A", "B"] tm.assert_frame_equal(result, expected) + + def test_transpose_float(self, float_frame): + frame = float_frame + dft = frame.T + for idx, series in dft.items(): + for col, value in series.items(): + if np.isnan(value): + assert np.isnan(frame[col][idx]) + else: + assert value == frame[col][idx] + + # mixed type + index, data = tm.getMixedTypeDict() + mixed = pd.DataFrame(data, index=index) + + mixed_T = mixed.T + for col, s in mixed_T.items(): + assert s.dtype == np.object_ + + def test_transpose_get_view(self, float_frame): + dft = float_frame.T + dft.values[:, 5:10] = 5 + + assert (float_frame.values[5:10] == 5).all() diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 9de5d6fe16a0d..17cc50661e3cb 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -14,15 +14,15 @@ class TestDataFrameMisc: - def test_copy_index_name_checking(self, float_frame): + @pytest.mark.parametrize("attr", ["index", "columns"]) + def test_copy_index_name_checking(self, float_frame, attr): # don't want to be able to modify the index stored elsewhere after # making a copy - for attr in ("index", "columns"): - ind = getattr(float_frame, attr) - ind.name = None - cp = float_frame.copy() - getattr(cp, attr).name = "foo" - assert getattr(float_frame, attr).name is None + ind = getattr(float_frame, attr) + ind.name = None + cp = float_frame.copy() + getattr(cp, attr).name = "foo" + assert getattr(float_frame, attr).name is None def test_getitem_pop_assign_name(self, float_frame): s = float_frame["A"] @@ -358,24 +358,6 @@ def test_to_numpy_copy(self): assert df.to_numpy(copy=False).base is arr assert df.to_numpy(copy=True).base is None - def test_transpose(self, float_frame): - frame = float_frame - dft = frame.T - for idx, series in dft.items(): - for col, value in series.items(): - if np.isnan(value): - assert np.isnan(frame[col][idx]) - else: - assert value == frame[col][idx] - - # mixed type - index, data = tm.getMixedTypeDict() - mixed = DataFrame(data, index=index) - - mixed_T = mixed.T - for col, s in mixed_T.items(): - assert s.dtype == np.object_ - def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) @@ -470,12 +452,6 @@ def test_deepcopy(self, float_frame): for idx, value in series.items(): assert float_frame["A"][idx] != value - def test_transpose_get_view(self, float_frame): - dft = float_frame.T - dft.values[:, 5:10] = 5 - - assert (float_frame.values[5:10] == 5).all() - def test_inplace_return_self(self): # GH 1893 From a7a102aa95a90d157bce95a8544f3c6032509193 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:25:16 -0800 Subject: [PATCH 5/9] implement test_update --- pandas/tests/frame/methods/test_update.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/frame/methods/test_update.py diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 800ecc77f1bc3110c1c92d56a236dd436d075707 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:27:23 -0800 Subject: [PATCH 6/9] organize update tests --- .../tests/frame/methods/test_combine_first.py | 0 pandas/tests/frame/methods/test_update.py | 135 ++++++++++++++++++ pandas/tests/frame/test_combine_concat.py | 129 ----------------- 3 files changed, 135 insertions(+), 129 deletions(-) create mode 100644 pandas/tests/frame/methods/test_combine_first.py diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index e69de29bb2d1d..d9de026dbf4e9 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -0,0 +1,135 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestDataFrameUpdate: + def test_update_nan(self): + # #15593 #15617 + # test 1 + df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) + df2 = DataFrame({"A": [None, 2, 3]}) + expected = df1.copy() + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + # test 2 + df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)}) + df2 = DataFrame({"A": [None, 2, 3]}) + expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + def test_update(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other) + + expected = DataFrame( + [[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + def test_update_dtypes(self): + + # gh 3016 + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + df.update(other) + + expected = DataFrame( + [[45.0, 45.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + tm.assert_frame_equal(df, expected) + + def test_update_nooverwrite(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other, overwrite=False) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]] + ) + tm.assert_frame_equal(df, expected) + + def test_update_filtered(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other, filter_func=lambda x: x > 2) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "bad_kwarg, exception, msg", + [ + # errors must be 'ignore' or 'raise' + ({"errors": "something"}, ValueError, "The parameter errors must.*"), + ({"join": "inner"}, NotImplementedError, "Only left join is supported"), + ], + ) + def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): + df = DataFrame([[1.5, 1, 3.0]]) + with pytest.raises(exception, match=msg): + df.update(df, **bad_kwarg) + + def test_update_raise_on_overlap(self): + df = DataFrame( + [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2]) + with pytest.raises(ValueError, match="Data overlaps"): + df.update(other, errors="raise") + + def test_update_from_non_df(self): + d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])} + df = DataFrame(d) + + d["a"] = Series([5, 6, 7, 8]) + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + df = DataFrame(d) + + d["a"] = [5, 6, 7, 8] + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + def test_update_datetime_tz(self): + # GH 25807 + result = DataFrame([pd.Timestamp("2019", tz="UTC")]) + result.update(result) + expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 9bad54b051d6c..6108637a5c300 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -128,115 +128,6 @@ def test_concat_tuple_keys(self): ) tm.assert_frame_equal(results, expected) - def test_update(self): - df = DataFrame( - [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] - ) - - other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) - - df.update(other) - - expected = DataFrame( - [[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] - ) - tm.assert_frame_equal(df, expected) - - def test_update_dtypes(self): - - # gh 3016 - df = DataFrame( - [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], - columns=["A", "B", "bool1", "bool2"], - ) - - other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) - df.update(other) - - expected = DataFrame( - [[45.0, 45.0, False, True], [4.0, 5.0, True, False]], - columns=["A", "B", "bool1", "bool2"], - ) - tm.assert_frame_equal(df, expected) - - def test_update_nooverwrite(self): - df = DataFrame( - [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] - ) - - other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) - - df.update(other, overwrite=False) - - expected = DataFrame( - [[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]] - ) - tm.assert_frame_equal(df, expected) - - def test_update_filtered(self): - df = DataFrame( - [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] - ) - - other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) - - df.update(other, filter_func=lambda x: x > 2) - - expected = DataFrame( - [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] - ) - tm.assert_frame_equal(df, expected) - - @pytest.mark.parametrize( - "bad_kwarg, exception, msg", - [ - # errors must be 'ignore' or 'raise' - ({"errors": "something"}, ValueError, "The parameter errors must.*"), - ({"join": "inner"}, NotImplementedError, "Only left join is supported"), - ], - ) - def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): - df = DataFrame([[1.5, 1, 3.0]]) - with pytest.raises(exception, match=msg): - df.update(df, **bad_kwarg) - - def test_update_raise_on_overlap(self): - df = DataFrame( - [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] - ) - - other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2]) - with pytest.raises(ValueError, match="Data overlaps"): - df.update(other, errors="raise") - - def test_update_from_non_df(self): - d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])} - df = DataFrame(d) - - d["a"] = Series([5, 6, 7, 8]) - df.update(d) - - expected = DataFrame(d) - - tm.assert_frame_equal(df, expected) - - d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} - df = DataFrame(d) - - d["a"] = [5, 6, 7, 8] - df.update(d) - - expected = DataFrame(d) - - tm.assert_frame_equal(df, expected) - - def test_update_datetime_tz(self): - # GH 25807 - result = DataFrame([pd.Timestamp("2019", tz="UTC")]) - result.update(result) - expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) - tm.assert_frame_equal(result, expected) - def test_join_str_datetime(self): str_dates = ["20120209", "20120222"] dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] @@ -776,23 +667,3 @@ def test_concat_datetime_datetime64_frame(self): # it works! pd.concat([df1, df2_obj]) - - -class TestDataFrameUpdate: - def test_update_nan(self): - # #15593 #15617 - # test 1 - df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) - df2 = DataFrame({"A": [None, 2, 3]}) - expected = df1.copy() - df1.update(df2, overwrite=False) - - tm.assert_frame_equal(df1, expected) - - # test 2 - df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)}) - df2 = DataFrame({"A": [None, 2, 3]}) - expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) - df1.update(df2, overwrite=False) - - tm.assert_frame_equal(df1, expected) From b43faf6905ca7a1dba8d0c08fffc18e7a8f23415 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:30:13 -0800 Subject: [PATCH 7/9] organize combine_first --- .../tests/frame/methods/test_combine_first.py | 349 ++++++++++++++++++ pandas/tests/frame/test_combine_concat.py | 341 ----------------- 2 files changed, 349 insertions(+), 341 deletions(-) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index e69de29bb2d1d..7715cb1cb6eec 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -0,0 +1,349 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series +import pandas._testing as tm + + +class TestDataFrameCombineFirst: + def test_combine_first_mixed(self): + a = Series(["a", "b"], index=range(2)) + b = Series(range(2), index=range(2)) + f = DataFrame({"A": a, "B": b}) + + a = Series(["a", "b"], index=range(5, 7)) + b = Series(range(2), index=range(5, 7)) + g = DataFrame({"A": a, "B": b}) + + exp = pd.DataFrame( + {"A": list("abab"), "B": [0.0, 1.0, 0.0, 1.0]}, index=[0, 1, 5, 6] + ) + combined = f.combine_first(g) + tm.assert_frame_equal(combined, exp) + + def test_combine_first(self, float_frame): + # disjoint + head, tail = float_frame[:5], float_frame[5:] + + combined = head.combine_first(tail) + reordered_frame = float_frame.reindex(combined.index) + tm.assert_frame_equal(combined, reordered_frame) + assert tm.equalContents(combined.columns, float_frame.columns) + tm.assert_series_equal(combined["A"], reordered_frame["A"]) + + # same index + fcopy = float_frame.copy() + fcopy["A"] = 1 + del fcopy["C"] + + fcopy2 = float_frame.copy() + fcopy2["B"] = 0 + del fcopy2["D"] + + combined = fcopy.combine_first(fcopy2) + + assert (combined["A"] == 1).all() + tm.assert_series_equal(combined["B"], fcopy["B"]) + tm.assert_series_equal(combined["C"], fcopy2["C"]) + tm.assert_series_equal(combined["D"], fcopy["D"]) + + # overlap + head, tail = reordered_frame[:10].copy(), reordered_frame + head["A"] = 1 + + combined = head.combine_first(tail) + assert (combined["A"][:10] == 1).all() + + # reverse overlap + tail["A"][:10] = 0 + combined = tail.combine_first(head) + assert (combined["A"][:10] == 0).all() + + # no overlap + f = float_frame[:10] + g = float_frame[10:] + combined = f.combine_first(g) + tm.assert_series_equal(combined["A"].reindex(f.index), f["A"]) + tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) + + # corner cases + comb = float_frame.combine_first(DataFrame()) + tm.assert_frame_equal(comb, float_frame) + + comb = DataFrame().combine_first(float_frame) + tm.assert_frame_equal(comb, float_frame) + + comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) + assert "faz" in comb.index + + # #2525 + df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)]) + df2 = DataFrame(columns=["b"]) + result = df.combine_first(df2) + assert "b" in result + + def test_combine_first_mixed_bug(self): + idx = Index(["a", "b", "c", "e"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "e"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3}) + + idx = Index(["a", "b", "c", "f"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "f"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3}) + + combined = frame1.combine_first(frame2) + assert len(combined.columns) == 5 + + # gh 3016 (same as in update) + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + result = df.combine_first(other) + tm.assert_frame_equal(result, df) + + df.loc[0, "A"] = np.nan + result = df.combine_first(other) + df.loc[0, "A"] = 45 + tm.assert_frame_equal(result, df) + + # doc example + df1 = DataFrame( + {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} + ) + + df2 = DataFrame( + { + "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], + "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], + } + ) + + result = df1.combine_first(df2) + expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) + tm.assert_frame_equal(result, expected) + + # GH3552, return object dtype with bools + df1 = DataFrame( + [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] + ) + df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) + + result = df1.combine_first(df2)[2] + expected = Series([True, True, False], name=2) + tm.assert_series_equal(result, expected) + + # GH 3593, converting datetime64[ns] incorrectly + df0 = DataFrame( + {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} + ) + df1 = DataFrame({"a": [None, None, None]}) + df2 = df1.combine_first(df0) + tm.assert_frame_equal(df2, df0) + + df2 = df0.combine_first(df1) + tm.assert_frame_equal(df2, df0) + + df0 = DataFrame( + {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} + ) + df1 = DataFrame({"a": [datetime(2000, 1, 2), None, None]}) + df2 = df1.combine_first(df0) + result = df0.copy() + result.iloc[0, :] = df1.iloc[0, :] + tm.assert_frame_equal(df2, result) + + df2 = df0.combine_first(df1) + tm.assert_frame_equal(df2, df0) + + def test_combine_first_align_nan(self): + # GH 7509 (not fixed) + dfa = pd.DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) + dfb = pd.DataFrame([[4], [5]], columns=["b"]) + assert dfa["a"].dtype == "datetime64[ns]" + assert dfa["b"].dtype == "int64" + + res = dfa.combine_first(dfb) + exp = pd.DataFrame( + {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2.0, 5.0]}, + columns=["a", "b"], + ) + tm.assert_frame_equal(res, exp) + assert res["a"].dtype == "datetime64[ns]" + # ToDo: this must be int64 + assert res["b"].dtype == "float64" + + res = dfa.iloc[:0].combine_first(dfb) + exp = pd.DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) + tm.assert_frame_equal(res, exp) + # ToDo: this must be datetime64 + assert res["a"].dtype == "float64" + # ToDo: this must be int64 + assert res["b"].dtype == "int64" + + def test_combine_first_timezone(self): + # see gh-7630 + data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") + df1 = pd.DataFrame( + columns=["UTCdatetime", "abc"], + data=data1, + index=pd.date_range("20140627", periods=1), + ) + data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") + df2 = pd.DataFrame( + columns=["UTCdatetime", "xyz"], + data=data2, + index=pd.date_range("20140628", periods=1), + ) + res = df2[["UTCdatetime"]].combine_first(df1) + exp = pd.DataFrame( + { + "UTCdatetime": [ + pd.Timestamp("2010-01-01 01:01", tz="UTC"), + pd.Timestamp("2012-12-12 12:12", tz="UTC"), + ], + "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT], + }, + columns=["UTCdatetime", "abc"], + index=pd.date_range("20140627", periods=2, freq="D"), + ) + tm.assert_frame_equal(res, exp) + assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" + assert res["abc"].dtype == "datetime64[ns, UTC]" + + # see gh-10567 + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") + df1 = pd.DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") + df2 = pd.DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, UTC]" + + dts1 = pd.DatetimeIndex( + ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" + ) + df1 = pd.DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) + dts2 = pd.DatetimeIndex( + ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" + ) + df2 = pd.DataFrame({"DATE": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.DatetimeIndex( + [ + "2011-01-01", + "2012-01-01", + "NaT", + "2012-01-02", + "2011-01-03", + "2011-01-04", + ], + tz="US/Eastern", + ) + exp = pd.DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + + # different tz + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") + df1 = pd.DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05") + df2 = pd.DataFrame({"DATE": dts2}) + + # if df1 doesn't have NaN, keep its dtype + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" + + dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") + df1 = pd.DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-01", "2015-01-03") + df2 = pd.DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Timestamp("2015-01-01", tz="US/Eastern"), + pd.Timestamp("2015-01-02", tz="US/Eastern"), + pd.Timestamp("2015-01-03"), + ] + exp = pd.DataFrame({"DATE": exp_dts}) + tm.assert_frame_equal(res, exp) + assert res["DATE"].dtype == "object" + + def test_combine_first_timedelta(self): + data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) + df1 = pd.DataFrame({"TD": data1}, index=[1, 3, 5, 7]) + data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) + df2 = pd.DataFrame({"TD": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.TimedeltaIndex( + ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] + ) + exp = pd.DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["TD"].dtype == "timedelta64[ns]" + + def test_combine_first_period(self): + data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") + df1 = pd.DataFrame({"P": data1}, index=[1, 3, 5, 7]) + data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") + df2 = pd.DataFrame({"P": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.PeriodIndex( + ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" + ) + exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == data1.dtype + + # different freq + dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") + df2 = pd.DataFrame({"P": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Period("2011-01", freq="M"), + pd.Period("2012-01-01", freq="D"), + pd.NaT, + pd.Period("2012-01-02", freq="D"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == "object" + + def test_combine_first_int(self): + # GH14687 - integer series that do no align exactly + + df1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") + df2 = pd.DataFrame({"a": [1, 4]}, dtype="int64") + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["a"].dtype == "int64" + + @pytest.mark.parametrize("val", [1, 1.0]) + def test_combine_first_with_asymmetric_other(self, val): + # see gh-20699 + df1 = pd.DataFrame({"isNum": [val]}) + df2 = pd.DataFrame({"isBool": [True]}) + + res = df1.combine_first(df2) + exp = pd.DataFrame({"isBool": [True], "isNum": [val]}) + + tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 6108637a5c300..36a476d195fe5 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -313,347 +313,6 @@ def test_concat_astype_dup_col(self): ).astype("category") tm.assert_frame_equal(result, expected) - -class TestDataFrameCombineFirst: - def test_combine_first_mixed(self): - a = Series(["a", "b"], index=range(2)) - b = Series(range(2), index=range(2)) - f = DataFrame({"A": a, "B": b}) - - a = Series(["a", "b"], index=range(5, 7)) - b = Series(range(2), index=range(5, 7)) - g = DataFrame({"A": a, "B": b}) - - exp = pd.DataFrame( - {"A": list("abab"), "B": [0.0, 1.0, 0.0, 1.0]}, index=[0, 1, 5, 6] - ) - combined = f.combine_first(g) - tm.assert_frame_equal(combined, exp) - - def test_combine_first(self, float_frame): - # disjoint - head, tail = float_frame[:5], float_frame[5:] - - combined = head.combine_first(tail) - reordered_frame = float_frame.reindex(combined.index) - tm.assert_frame_equal(combined, reordered_frame) - assert tm.equalContents(combined.columns, float_frame.columns) - tm.assert_series_equal(combined["A"], reordered_frame["A"]) - - # same index - fcopy = float_frame.copy() - fcopy["A"] = 1 - del fcopy["C"] - - fcopy2 = float_frame.copy() - fcopy2["B"] = 0 - del fcopy2["D"] - - combined = fcopy.combine_first(fcopy2) - - assert (combined["A"] == 1).all() - tm.assert_series_equal(combined["B"], fcopy["B"]) - tm.assert_series_equal(combined["C"], fcopy2["C"]) - tm.assert_series_equal(combined["D"], fcopy["D"]) - - # overlap - head, tail = reordered_frame[:10].copy(), reordered_frame - head["A"] = 1 - - combined = head.combine_first(tail) - assert (combined["A"][:10] == 1).all() - - # reverse overlap - tail["A"][:10] = 0 - combined = tail.combine_first(head) - assert (combined["A"][:10] == 0).all() - - # no overlap - f = float_frame[:10] - g = float_frame[10:] - combined = f.combine_first(g) - tm.assert_series_equal(combined["A"].reindex(f.index), f["A"]) - tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) - - # corner cases - comb = float_frame.combine_first(DataFrame()) - tm.assert_frame_equal(comb, float_frame) - - comb = DataFrame().combine_first(float_frame) - tm.assert_frame_equal(comb, float_frame) - - comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) - assert "faz" in comb.index - - # #2525 - df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)]) - df2 = DataFrame(columns=["b"]) - result = df.combine_first(df2) - assert "b" in result - - def test_combine_first_mixed_bug(self): - idx = Index(["a", "b", "c", "e"]) - ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) - ser2 = Series(["a", "b", "c", "e"], index=idx) - ser3 = Series([12, 4, 5, 97], index=idx) - - frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3}) - - idx = Index(["a", "b", "c", "f"]) - ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) - ser2 = Series(["a", "b", "c", "f"], index=idx) - ser3 = Series([12, 4, 5, 97], index=idx) - - frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3}) - - combined = frame1.combine_first(frame2) - assert len(combined.columns) == 5 - - # gh 3016 (same as in update) - df = DataFrame( - [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], - columns=["A", "B", "bool1", "bool2"], - ) - - other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) - result = df.combine_first(other) - tm.assert_frame_equal(result, df) - - df.loc[0, "A"] = np.nan - result = df.combine_first(other) - df.loc[0, "A"] = 45 - tm.assert_frame_equal(result, df) - - # doc example - df1 = DataFrame( - {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} - ) - - df2 = DataFrame( - { - "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], - "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], - } - ) - - result = df1.combine_first(df2) - expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) - tm.assert_frame_equal(result, expected) - - # GH3552, return object dtype with bools - df1 = DataFrame( - [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] - ) - df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) - - result = df1.combine_first(df2)[2] - expected = Series([True, True, False], name=2) - tm.assert_series_equal(result, expected) - - # GH 3593, converting datetime64[ns] incorrectly - df0 = DataFrame( - {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} - ) - df1 = DataFrame({"a": [None, None, None]}) - df2 = df1.combine_first(df0) - tm.assert_frame_equal(df2, df0) - - df2 = df0.combine_first(df1) - tm.assert_frame_equal(df2, df0) - - df0 = DataFrame( - {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} - ) - df1 = DataFrame({"a": [datetime(2000, 1, 2), None, None]}) - df2 = df1.combine_first(df0) - result = df0.copy() - result.iloc[0, :] = df1.iloc[0, :] - tm.assert_frame_equal(df2, result) - - df2 = df0.combine_first(df1) - tm.assert_frame_equal(df2, df0) - - def test_combine_first_align_nan(self): - # GH 7509 (not fixed) - dfa = pd.DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) - dfb = pd.DataFrame([[4], [5]], columns=["b"]) - assert dfa["a"].dtype == "datetime64[ns]" - assert dfa["b"].dtype == "int64" - - res = dfa.combine_first(dfb) - exp = pd.DataFrame( - {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2.0, 5.0]}, - columns=["a", "b"], - ) - tm.assert_frame_equal(res, exp) - assert res["a"].dtype == "datetime64[ns]" - # ToDo: this must be int64 - assert res["b"].dtype == "float64" - - res = dfa.iloc[:0].combine_first(dfb) - exp = pd.DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) - tm.assert_frame_equal(res, exp) - # ToDo: this must be datetime64 - assert res["a"].dtype == "float64" - # ToDo: this must be int64 - assert res["b"].dtype == "int64" - - def test_combine_first_timezone(self): - # see gh-7630 - data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") - df1 = pd.DataFrame( - columns=["UTCdatetime", "abc"], - data=data1, - index=pd.date_range("20140627", periods=1), - ) - data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") - df2 = pd.DataFrame( - columns=["UTCdatetime", "xyz"], - data=data2, - index=pd.date_range("20140628", periods=1), - ) - res = df2[["UTCdatetime"]].combine_first(df1) - exp = pd.DataFrame( - { - "UTCdatetime": [ - pd.Timestamp("2010-01-01 01:01", tz="UTC"), - pd.Timestamp("2012-12-12 12:12", tz="UTC"), - ], - "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT], - }, - columns=["UTCdatetime", "abc"], - index=pd.date_range("20140627", periods=2, freq="D"), - ) - tm.assert_frame_equal(res, exp) - assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" - assert res["abc"].dtype == "datetime64[ns, UTC]" - - # see gh-10567 - dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") - df1 = pd.DataFrame({"DATE": dts1}) - dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") - df2 = pd.DataFrame({"DATE": dts2}) - - res = df1.combine_first(df2) - tm.assert_frame_equal(res, df1) - assert res["DATE"].dtype == "datetime64[ns, UTC]" - - dts1 = pd.DatetimeIndex( - ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" - ) - df1 = pd.DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) - dts2 = pd.DatetimeIndex( - ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" - ) - df2 = pd.DataFrame({"DATE": dts2}, index=[2, 4, 5]) - - res = df1.combine_first(df2) - exp_dts = pd.DatetimeIndex( - [ - "2011-01-01", - "2012-01-01", - "NaT", - "2012-01-02", - "2011-01-03", - "2011-01-04", - ], - tz="US/Eastern", - ) - exp = pd.DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) - tm.assert_frame_equal(res, exp) - - # different tz - dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") - df1 = pd.DataFrame({"DATE": dts1}) - dts2 = pd.date_range("2015-01-03", "2015-01-05") - df2 = pd.DataFrame({"DATE": dts2}) - - # if df1 doesn't have NaN, keep its dtype - res = df1.combine_first(df2) - tm.assert_frame_equal(res, df1) - assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" - - dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") - df1 = pd.DataFrame({"DATE": dts1}) - dts2 = pd.date_range("2015-01-01", "2015-01-03") - df2 = pd.DataFrame({"DATE": dts2}) - - res = df1.combine_first(df2) - exp_dts = [ - pd.Timestamp("2015-01-01", tz="US/Eastern"), - pd.Timestamp("2015-01-02", tz="US/Eastern"), - pd.Timestamp("2015-01-03"), - ] - exp = pd.DataFrame({"DATE": exp_dts}) - tm.assert_frame_equal(res, exp) - assert res["DATE"].dtype == "object" - - def test_combine_first_timedelta(self): - data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) - df1 = pd.DataFrame({"TD": data1}, index=[1, 3, 5, 7]) - data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) - df2 = pd.DataFrame({"TD": data2}, index=[2, 4, 5]) - - res = df1.combine_first(df2) - exp_dts = pd.TimedeltaIndex( - ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] - ) - exp = pd.DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) - tm.assert_frame_equal(res, exp) - assert res["TD"].dtype == "timedelta64[ns]" - - def test_combine_first_period(self): - data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") - df1 = pd.DataFrame({"P": data1}, index=[1, 3, 5, 7]) - data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") - df2 = pd.DataFrame({"P": data2}, index=[2, 4, 5]) - - res = df1.combine_first(df2) - exp_dts = pd.PeriodIndex( - ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" - ) - exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) - tm.assert_frame_equal(res, exp) - assert res["P"].dtype == data1.dtype - - # different freq - dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") - df2 = pd.DataFrame({"P": dts2}, index=[2, 4, 5]) - - res = df1.combine_first(df2) - exp_dts = [ - pd.Period("2011-01", freq="M"), - pd.Period("2012-01-01", freq="D"), - pd.NaT, - pd.Period("2012-01-02", freq="D"), - pd.Period("2011-03", freq="M"), - pd.Period("2011-04", freq="M"), - ] - exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) - tm.assert_frame_equal(res, exp) - assert res["P"].dtype == "object" - - def test_combine_first_int(self): - # GH14687 - integer series that do no align exactly - - df1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") - df2 = pd.DataFrame({"a": [1, 4]}, dtype="int64") - - res = df1.combine_first(df2) - tm.assert_frame_equal(res, df1) - assert res["a"].dtype == "int64" - - @pytest.mark.parametrize("val", [1, 1.0]) - def test_combine_first_with_asymmetric_other(self, val): - # see gh-20699 - df1 = pd.DataFrame({"isNum": [val]}) - df2 = pd.DataFrame({"isBool": [True]}) - - res = df1.combine_first(df2) - exp = pd.DataFrame({"isBool": [True], "isNum": [val]}) - - tm.assert_frame_equal(res, exp) - def test_concat_datetime_datetime64_frame(self): # #2624 rows = [] From 7fc5186588c6598fda4c9f06d3edfd522a607c74 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 19:44:41 -0800 Subject: [PATCH 8/9] fixup --- pandas/tests/frame/indexing/test_iat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_iat.py b/pandas/tests/frame/indexing/test_iat.py index e583fff9b92db..628fe73024fdc 100644 --- a/pandas/tests/frame/indexing/test_iat.py +++ b/pandas/tests/frame/indexing/test_iat.py @@ -1,5 +1,5 @@ -def test_iat(self, float_frame): +def test_iat(float_frame): for i, row in enumerate(float_frame.index): for j, col in enumerate(float_frame.columns): From 437816e2a7e8130af035656ab9b17108e89dc085 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 2 Feb 2020 08:58:48 -0800 Subject: [PATCH 9/9] blackify --- pandas/tests/frame/indexing/test_iat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_iat.py b/pandas/tests/frame/indexing/test_iat.py index 628fe73024fdc..23e3392251a3a 100644 --- a/pandas/tests/frame/indexing/test_iat.py +++ b/pandas/tests/frame/indexing/test_iat.py @@ -1,4 +1,3 @@ - def test_iat(float_frame): for i, row in enumerate(float_frame.index):