From 7e2ce62bab75bd375774e61c6910d8bc449d835e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 1 Nov 2020 16:01:55 -0800 Subject: [PATCH 1/2] TST/REF: collect tests by method --- pandas/tests/frame/methods/test_swapaxes.py | 22 +++ pandas/tests/frame/test_add_prefix_suffix.py | 20 +++ pandas/tests/frame/test_api.py | 174 +------------------ pandas/tests/frame/test_iteration.py | 154 ++++++++++++++++ pandas/tests/io/test_pickle.py | 24 ++- pandas/tests/series/test_api.py | 48 ----- pandas/tests/series/test_iteration.py | 33 ++++ 7 files changed, 248 insertions(+), 227 deletions(-) create mode 100644 pandas/tests/frame/methods/test_swapaxes.py create mode 100644 pandas/tests/frame/test_add_prefix_suffix.py create mode 100644 pandas/tests/frame/test_iteration.py create mode 100644 pandas/tests/series/test_iteration.py diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py new file mode 100644 index 0000000000000..306f7b2b21cda --- /dev/null +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -0,0 +1,22 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestSwapAxes: + def test_swapaxes(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) + tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) + + def test_swapaxes_noop(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df, df.swapaxes(0, 0)) + + def test_swapaxes_invalid_axis(self): + df = DataFrame(np.random.randn(10, 5)) + msg = "No axis named 2 for object type DataFrame" + with pytest.raises(ValueError, match=msg): + df.swapaxes(2, 5) diff --git a/pandas/tests/frame/test_add_prefix_suffix.py b/pandas/tests/frame/test_add_prefix_suffix.py new file mode 100644 index 0000000000000..ea75e9ff51552 --- /dev/null +++ b/pandas/tests/frame/test_add_prefix_suffix.py @@ -0,0 +1,20 @@ +from pandas import Index +import pandas._testing as tm + + +def test_add_prefix_suffix(float_frame): + with_prefix = float_frame.add_prefix("foo#") + expected = Index([f"foo#{c}" for c in float_frame.columns]) + tm.assert_index_equal(with_prefix.columns, expected) + + with_suffix = float_frame.add_suffix("#foo") + expected = Index([f"{c}#foo" for c in float_frame.columns]) + tm.assert_index_equal(with_suffix.columns, expected) + + with_pct_prefix = float_frame.add_prefix("%") + expected = Index([f"%{c}" for c in float_frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = float_frame.add_suffix("%") + expected = Index([f"{c}%" for c in float_frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 68a004d8fbccb..25d3fab76ca36 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -1,5 +1,4 @@ from copy import deepcopy -import datetime import inspect import pydoc import warnings @@ -7,12 +6,11 @@ import numpy as np import pytest -from pandas.compat import IS64, is_platform_windows import pandas.util._test_decorators as td from pandas.util._test_decorators import async_mark, skip_if_no import pandas as pd -from pandas import Categorical, DataFrame, Series, date_range, timedelta_range +from pandas import DataFrame, Series, date_range, timedelta_range import pandas._testing as tm @@ -30,23 +28,6 @@ def test_getitem_pop_assign_name(self, float_frame): s2 = s.loc[:] assert s2.name == "B" - def test_add_prefix_suffix(self, float_frame): - with_prefix = float_frame.add_prefix("foo#") - expected = pd.Index([f"foo#{c}" for c in float_frame.columns]) - tm.assert_index_equal(with_prefix.columns, expected) - - with_suffix = float_frame.add_suffix("#foo") - expected = pd.Index([f"{c}#foo" for c in float_frame.columns]) - tm.assert_index_equal(with_suffix.columns, expected) - - with_pct_prefix = float_frame.add_prefix("%") - expected = pd.Index([f"%{c}" for c in float_frame.columns]) - tm.assert_index_equal(with_pct_prefix.columns, expected) - - with_pct_suffix = float_frame.add_suffix("%") - expected = pd.Index([f"{c}%" for c in float_frame.columns]) - tm.assert_index_equal(with_pct_suffix.columns, expected) - def test_get_axis(self, float_frame): f = float_frame assert f._get_axis_number(0) == 0 @@ -76,9 +57,6 @@ def test_get_axis(self, float_frame): with pytest.raises(ValueError, match="No axis named"): f._get_axis_number(None) - def test_keys(self, float_frame): - assert float_frame.keys() is float_frame.columns - def test_column_contains_raises(self, float_frame): with pytest.raises(TypeError, match="unhashable type: 'Index'"): float_frame.columns in float_frame @@ -149,143 +127,6 @@ def test_empty(self, float_frame, float_string_frame): del df["A"] assert not df.empty - def test_iteritems(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) - for k, v in df.items(): - assert isinstance(v, DataFrame._constructor_sliced) - - def test_items(self): - # GH 17213, GH 13918 - cols = ["a", "b", "c"] - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) - for c, (k, v) in zip(cols, df.items()): - assert c == k - assert isinstance(v, Series) - assert (df[k] == v).all() - - def test_iter(self, float_frame): - assert tm.equalContents(list(float_frame), float_frame.columns) - - def test_iterrows(self, float_frame, float_string_frame): - for k, v in float_frame.iterrows(): - exp = float_frame.loc[k] - tm.assert_series_equal(v, exp) - - for k, v in float_string_frame.iterrows(): - exp = float_string_frame.loc[k] - tm.assert_series_equal(v, exp) - - def test_iterrows_iso8601(self): - # GH 19671 - s = DataFrame( - { - "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], - "iso8601": date_range("2000-01-01", periods=4, freq="M"), - } - ) - for k, v in s.iterrows(): - exp = s.loc[k] - tm.assert_series_equal(v, exp) - - def test_iterrows_corner(self): - # gh-12222 - df = DataFrame( - { - "a": [datetime.datetime(2015, 1, 1)], - "b": [None], - "c": [None], - "d": [""], - "e": [[]], - "f": [set()], - "g": [{}], - } - ) - expected = Series( - [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}], - index=list("abcdefg"), - name=0, - dtype="object", - ) - _, result = next(df.iterrows()) - tm.assert_series_equal(result, expected) - - def test_itertuples(self, float_frame): - for i, tup in enumerate(float_frame.itertuples()): - s = DataFrame._constructor_sliced(tup[1:]) - s.name = tup[0] - expected = float_frame.iloc[i, :].reset_index(drop=True) - tm.assert_series_equal(s, expected) - - df = DataFrame( - {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] - ) - - for tup in df.itertuples(index=False): - assert isinstance(tup[1], int) - - df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) - dfaa = df[["a", "a"]] - - assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] - - # repr with int on 32-bit/windows - if not (is_platform_windows() or not IS64): - assert ( - repr(list(df.itertuples(name=None))) - == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]" - ) - - tup = next(df.itertuples(name="TestName")) - assert tup._fields == ("Index", "a", "b") - assert (tup.Index, tup.a, tup.b) == tup - assert type(tup).__name__ == "TestName" - - df.columns = ["def", "return"] - tup2 = next(df.itertuples(name="TestName")) - assert tup2 == (0, 1, 4) - assert tup2._fields == ("Index", "_1", "_2") - - df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) - # will raise SyntaxError if trying to create namedtuple - tup3 = next(df3.itertuples()) - assert isinstance(tup3, tuple) - assert hasattr(tup3, "_fields") - - # GH 28282 - df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) - result_254_columns = next(df_254_columns.itertuples(index=False)) - assert isinstance(result_254_columns, tuple) - assert hasattr(result_254_columns, "_fields") - - df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) - result_255_columns = next(df_255_columns.itertuples(index=False)) - assert isinstance(result_255_columns, tuple) - assert hasattr(result_255_columns, "_fields") - - def test_sequence_like_with_categorical(self): - - # GH 7839 - # make sure can iterate - df = DataFrame( - {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} - ) - df["grade"] = Categorical(df["raw_grade"]) - - # basic sequencing testing - result = list(df.grade.values) - expected = np.array(df.grade.values).tolist() - tm.assert_almost_equal(result, expected) - - # iteration - for t in df.itertuples(index=False): - str(t) - - for row, s in df.iterrows(): - str(s) - - for c, col in df.items(): - str(s) - def test_len(self, float_frame): assert len(float_frame) == len(float_frame.index) @@ -294,15 +135,6 @@ def test_len(self, float_frame): expected = float_frame.reindex(columns=["A", "B"]).values tm.assert_almost_equal(arr, expected) - def test_swapaxes(self): - df = DataFrame(np.random.randn(10, 5)) - tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) - tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) - tm.assert_frame_equal(df, df.swapaxes(0, 0)) - msg = "No axis named 2 for object type DataFrame" - with pytest.raises(ValueError, match=msg): - df.swapaxes(2, 5) - def test_axis_aliases(self, float_frame): f = float_frame @@ -321,10 +153,6 @@ def test_class_axis(self): assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) - def test_items_names(self, float_string_frame): - for k, v in float_string_frame.items(): - assert v.name == k - def test_series_put_names(self, float_string_frame): series = float_string_frame._series for k, v in series.items(): diff --git a/pandas/tests/frame/test_iteration.py b/pandas/tests/frame/test_iteration.py new file mode 100644 index 0000000000000..d6268f90b2681 --- /dev/null +++ b/pandas/tests/frame/test_iteration.py @@ -0,0 +1,154 @@ +import datetime + +import numpy as np + +from pandas.compat import IS64, is_platform_windows + +from pandas import Categorical, DataFrame, Series, date_range +import pandas._testing as tm + + +class TestIteration: + def test_keys(self, float_frame): + assert float_frame.keys() is float_frame.columns + + def test_iteritems(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + for k, v in df.items(): + assert isinstance(v, DataFrame._constructor_sliced) + + def test_items(self): + # GH#17213, GH#13918 + cols = ["a", "b", "c"] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() + + def test_items_names(self, float_string_frame): + for k, v in float_string_frame.items(): + assert v.name == k + + def test_iter(self, float_frame): + assert tm.equalContents(list(float_frame), float_frame.columns) + + def test_iterrows(self, float_frame, float_string_frame): + for k, v in float_frame.iterrows(): + exp = float_frame.loc[k] + tm.assert_series_equal(v, exp) + + for k, v in float_string_frame.iterrows(): + exp = float_string_frame.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_iso8601(self): + # GH#19671 + s = DataFrame( + { + "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], + "iso8601": date_range("2000-01-01", periods=4, freq="M"), + } + ) + for k, v in s.iterrows(): + exp = s.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_corner(self): + # GH#12222 + df = DataFrame( + { + "a": [datetime.datetime(2015, 1, 1)], + "b": [None], + "c": [None], + "d": [""], + "e": [[]], + "f": [set()], + "g": [{}], + } + ) + expected = Series( + [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}], + index=list("abcdefg"), + name=0, + dtype="object", + ) + _, result = next(df.iterrows()) + tm.assert_series_equal(result, expected) + + def test_itertuples(self, float_frame): + for i, tup in enumerate(float_frame.itertuples()): + ser = DataFrame._constructor_sliced(tup[1:]) + ser.name = tup[0] + expected = float_frame.iloc[i, :].reset_index(drop=True) + tm.assert_series_equal(ser, expected) + + df = DataFrame( + {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] + ) + + for tup in df.itertuples(index=False): + assert isinstance(tup[1], int) + + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + dfaa = df[["a", "a"]] + + assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] + + # repr with int on 32-bit/windows + if not (is_platform_windows() or not IS64): + assert ( + repr(list(df.itertuples(name=None))) + == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]" + ) + + tup = next(df.itertuples(name="TestName")) + assert tup._fields == ("Index", "a", "b") + assert (tup.Index, tup.a, tup.b) == tup + assert type(tup).__name__ == "TestName" + + df.columns = ["def", "return"] + tup2 = next(df.itertuples(name="TestName")) + assert tup2 == (0, 1, 4) + assert tup2._fields == ("Index", "_1", "_2") + + df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) + # will raise SyntaxError if trying to create namedtuple + tup3 = next(df3.itertuples()) + assert isinstance(tup3, tuple) + assert hasattr(tup3, "_fields") + + # GH#28282 + df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) + result_254_columns = next(df_254_columns.itertuples(index=False)) + assert isinstance(result_254_columns, tuple) + assert hasattr(result_254_columns, "_fields") + + df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) + result_255_columns = next(df_255_columns.itertuples(index=False)) + assert isinstance(result_255_columns, tuple) + assert hasattr(result_255_columns, "_fields") + + def test_sequence_like_with_categorical(self): + + # GH#7839 + # make sure can iterate + df = DataFrame( + {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + df["grade"] = Categorical(df["raw_grade"]) + + # basic sequencing testing + result = list(df.grade.values) + expected = np.array(df.grade.values).tolist() + tm.assert_almost_equal(result, expected) + + # iteration + for t in df.itertuples(index=False): + str(t) + + for row, s in df.iterrows(): + str(s) + + for c, col in df.items(): + str(s) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 890146f0789ae..7c4d3d3ef76a6 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -563,11 +563,23 @@ def test_pickle_timeseries_periodindex(): "name", [777, 777.0, "name", datetime.datetime(2001, 11, 11), (1, 2)] ) def test_pickle_preserve_name(name): - def _pickle_roundtrip_name(obj): - with tm.ensure_clean() as path: - obj.to_pickle(path) - unpickled = pd.read_pickle(path) - return unpickled - unpickled = _pickle_roundtrip_name(tm.makeTimeSeries(name=name)) + unpickled = _pickle_roundtrip(tm.makeTimeSeries(name=name)) assert unpickled.name == name + + +def test_pickle_datetimes(datetime_series): + unp_ts = _pickle_roundtrip(datetime_series) + tm.assert_series_equal(unp_ts, datetime_series) + + +def test_pickle_strings(string_series): + unp_series = _pickle_roundtrip(string_series) + tm.assert_series_equal(unp_series, string_series) + + +def _pickle_roundtrip(obj): + with tm.ensure_clean() as path: + obj.to_pickle(path) + unpickled = pd.read_pickle(path) + return unpickled diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 5dbee5cc0567b..717d8b5c90d85 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -22,21 +22,6 @@ def test_getitem_preserve_name(self, datetime_series): result = datetime_series[5:10] assert result.name == datetime_series.name - def test_pickle_datetimes(self, datetime_series): - unp_ts = self._pickle_roundtrip(datetime_series) - tm.assert_series_equal(unp_ts, datetime_series) - - def test_pickle_strings(self, string_series): - unp_series = self._pickle_roundtrip(string_series) - tm.assert_series_equal(unp_series, string_series) - - def _pickle_roundtrip(self, obj): - - with tm.ensure_clean() as path: - obj.to_pickle(path) - unpickled = pd.read_pickle(path) - return unpickled - def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) @@ -130,44 +115,11 @@ def test_not_hashable(self): def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) - def test_iter_datetimes(self, datetime_series): - for i, val in enumerate(datetime_series): - assert val == datetime_series[i] - - def test_iter_strings(self, string_series): - for i, val in enumerate(string_series): - assert val == string_series[i] - - def test_keys(self, datetime_series): - assert datetime_series.keys() is datetime_series.index - def test_values(self, datetime_series): tm.assert_almost_equal( datetime_series.values, datetime_series, check_dtype=False ) - def test_iteritems_datetimes(self, datetime_series): - for idx, val in datetime_series.iteritems(): - assert val == datetime_series[idx] - - def test_iteritems_strings(self, string_series): - for idx, val in string_series.iteritems(): - assert val == string_series[idx] - - # assert is lazy (generators don't define reverse, lists do) - assert not hasattr(string_series.iteritems(), "reverse") - - def test_items_datetimes(self, datetime_series): - for idx, val in datetime_series.items(): - assert val == datetime_series[idx] - - def test_items_strings(self, string_series): - for idx, val in string_series.items(): - assert val == string_series[idx] - - # assert is lazy (generators don't define reverse, lists do) - assert not hasattr(string_series.items(), "reverse") - def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" diff --git a/pandas/tests/series/test_iteration.py b/pandas/tests/series/test_iteration.py new file mode 100644 index 0000000000000..dc9fe9e3bdd34 --- /dev/null +++ b/pandas/tests/series/test_iteration.py @@ -0,0 +1,33 @@ +class TestIteration: + def test_keys(self, datetime_series): + assert datetime_series.keys() is datetime_series.index + + def test_iter_datetimes(self, datetime_series): + for i, val in enumerate(datetime_series): + assert val == datetime_series[i] + + def test_iter_strings(self, string_series): + for i, val in enumerate(string_series): + assert val == string_series[i] + + def test_iteritems_datetimes(self, datetime_series): + for idx, val in datetime_series.iteritems(): + assert val == datetime_series[idx] + + def test_iteritems_strings(self, string_series): + for idx, val in string_series.iteritems(): + assert val == string_series[idx] + + # assert is lazy (generators don't define reverse, lists do) + assert not hasattr(string_series.iteritems(), "reverse") + + def test_items_datetimes(self, datetime_series): + for idx, val in datetime_series.items(): + assert val == datetime_series[idx] + + def test_items_strings(self, string_series): + for idx, val in string_series.items(): + assert val == string_series[idx] + + # assert is lazy (generators don't define reverse, lists do) + assert not hasattr(string_series.items(), "reverse") From 37650c968679a3ab238d6eb0c647e5914ba48419 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 1 Nov 2020 17:44:41 -0800 Subject: [PATCH 2/2] use round_trip_pickle --- pandas/tests/io/test_pickle.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 7c4d3d3ef76a6..925f6b5f125c7 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -564,22 +564,15 @@ def test_pickle_timeseries_periodindex(): ) def test_pickle_preserve_name(name): - unpickled = _pickle_roundtrip(tm.makeTimeSeries(name=name)) + unpickled = tm.round_trip_pickle(tm.makeTimeSeries(name=name)) assert unpickled.name == name def test_pickle_datetimes(datetime_series): - unp_ts = _pickle_roundtrip(datetime_series) + unp_ts = tm.round_trip_pickle(datetime_series) tm.assert_series_equal(unp_ts, datetime_series) def test_pickle_strings(string_series): - unp_series = _pickle_roundtrip(string_series) + unp_series = tm.round_trip_pickle(string_series) tm.assert_series_equal(unp_series, string_series) - - -def _pickle_roundtrip(obj): - with tm.ensure_clean() as path: - obj.to_pickle(path) - unpickled = pd.read_pickle(path) - return unpickled