From dcd790c0a83c7e3cb8f1d1a76d44dc7ad2f2da43 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 3 Nov 2020 16:33:51 -0800 Subject: [PATCH 1/2] misplaced loc test --- pandas/tests/indexing/test_loc.py | 5 +++++ pandas/tests/internals/test_internals.py | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c1a5db992d3df..2f43f0ae4b031 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -119,6 +119,11 @@ def test_setitem_from_duplicate_axis(self): class TestLoc2: # TODO: better name, just separating out things that rely on base class + def test_loc_getitem_missing_unicode_key(self): + df = DataFrame({"a": [1]}) + with pytest.raises(KeyError, match="\u05d0"): + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + def test_loc_getitem_dups(self): # GH 5678 # repeated getitems on a dup index returning a ndarray diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index bddc50a3cbcc1..88b91ecc79060 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1146,12 +1146,6 @@ def test_make_block_no_pandas_array(): assert result.is_extension is False -def test_missing_unicode_key(): - df = DataFrame({"a": [1]}) - with pytest.raises(KeyError, match="\u05d0"): - df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError - - def test_single_block_manager_fastpath_deprecated(): # GH#33092 ser = Series(range(3)) From 6f603e76e140e5110a78431cd07c59ae20061a9a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Nov 2020 10:29:42 -0800 Subject: [PATCH 2/2] TST/REF: collect get_numeric_data tests --- .../frame/methods/test_get_numeric_data.py | 96 +++++++++++++++++++ pandas/tests/frame/test_block_internals.py | 85 +--------------- pandas/tests/generic/test_frame.py | 8 -- pandas/tests/generic/test_series.py | 22 +---- .../series/methods/test_get_numeric_data.py | 25 +++++ 5 files changed, 123 insertions(+), 113 deletions(-) create mode 100644 pandas/tests/frame/methods/test_get_numeric_data.py create mode 100644 pandas/tests/series/methods/test_get_numeric_data.py diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py new file mode 100644 index 0000000000000..d73dbdf045be3 --- /dev/null +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -0,0 +1,96 @@ +import numpy as np + +from pandas import Categorical, DataFrame, Index, Series, Timestamp +import pandas._testing as tm +from pandas.core.arrays import IntervalArray, integer_array + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + # get the numeric data + obj = DataFrame({"A": [1, "2", 3.0]}) + result = obj._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data(self): + + datetime64name = np.dtype("M8[ns]").name + objectname = np.dtype(np.object_).name + + df = DataFrame( + {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [ + np.dtype("float64"), + np.dtype("int64"), + np.dtype(objectname), + np.dtype(datetime64name), + ], + index=["a", "b", "c", "f"], + ) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "d": np.array([1.0] * 10, dtype="float32"), + "e": np.array([1] * 10, dtype="int32"), + "f": np.array([1] * 10, dtype="int16"), + "g": Timestamp("20010102"), + }, + index=np.arange(10), + ) + + result = df._get_numeric_data() + expected = df.loc[:, ["a", "b", "d", "e", "f"]] + tm.assert_frame_equal(result, expected) + + only_obj = df.loc[:, ["c", "g"]] + result = only_obj._get_numeric_data() + expected = df.loc[:, []] + tm.assert_frame_equal(result, expected) + + df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) + result = df._get_numeric_data() + expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) + tm.assert_frame_equal(result, expected) + + df = result.copy() + result = df._get_numeric_data() + expected = df + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data_mixed_dtype(self): + # numeric and object columns + + df = DataFrame( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["foo", "bar", "baz"], + "d": [None, None, None], + "e": [3.14, 0.577, 2.773], + } + ) + result = df._get_numeric_data() + tm.assert_index_equal(result.columns, Index(["a", "b", "e"])) + + def test_get_numeric_data_extension_dtype(self): + # GH#22290 + df = DataFrame( + { + "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), + "B": Categorical(list("abcabc")), + "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), + "D": IntervalArray.from_breaks(range(7)), + } + ) + result = df._get_numeric_data() + expected = df.loc[:, ["A", "C"]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 34aa11eb76306..5513262af8100 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -16,7 +16,6 @@ option_context, ) import pandas._testing as tm -from pandas.core.arrays import IntervalArray, integer_array from pandas.core.internals import ObjectBlock from pandas.core.internals.blocks import IntBlock @@ -306,73 +305,6 @@ def test_is_mixed_type(self, float_frame, float_string_frame): assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type - def test_get_numeric_data(self): - - datetime64name = np.dtype("M8[ns]").name - objectname = np.dtype(np.object_).name - - df = DataFrame( - {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, - index=np.arange(10), - ) - result = df.dtypes - expected = Series( - [ - np.dtype("float64"), - np.dtype("int64"), - np.dtype(objectname), - np.dtype(datetime64name), - ], - index=["a", "b", "c", "f"], - ) - tm.assert_series_equal(result, expected) - - df = DataFrame( - { - "a": 1.0, - "b": 2, - "c": "foo", - "d": np.array([1.0] * 10, dtype="float32"), - "e": np.array([1] * 10, dtype="int32"), - "f": np.array([1] * 10, dtype="int16"), - "g": Timestamp("20010102"), - }, - index=np.arange(10), - ) - - result = df._get_numeric_data() - expected = df.loc[:, ["a", "b", "d", "e", "f"]] - tm.assert_frame_equal(result, expected) - - only_obj = df.loc[:, ["c", "g"]] - result = only_obj._get_numeric_data() - expected = df.loc[:, []] - tm.assert_frame_equal(result, expected) - - df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) - result = df._get_numeric_data() - expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) - tm.assert_frame_equal(result, expected) - - df = result.copy() - result = df._get_numeric_data() - expected = df - tm.assert_frame_equal(result, expected) - - def test_get_numeric_data_extension_dtype(self): - # GH 22290 - df = DataFrame( - { - "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), - "B": Categorical(list("abcabc")), - "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), - "D": IntervalArray.from_breaks(range(7)), - } - ) - result = df._get_numeric_data() - expected = df.loc[:, ["A", "C"]] - tm.assert_frame_equal(result, expected) - def test_stale_cached_series_bug_473(self): # this is chained, but ok @@ -390,21 +322,6 @@ def test_stale_cached_series_bug_473(self): exp = Y["g"].sum() # noqa assert pd.isna(Y["g"]["c"]) - def test_get_X_columns(self): - # numeric and object columns - - df = DataFrame( - { - "a": [1, 2, 3], - "b": [True, False, True], - "c": ["foo", "bar", "baz"], - "d": [None, None, None], - "e": [3.14, 0.577, 2.773], - } - ) - - tm.assert_index_equal(df._get_numeric_data().columns, pd.Index(["a", "b", "e"])) - def test_strange_column_corruption_issue(self): # FIXME: dont leave commented-out # (wesm) Unclear how exactly this is related to internal matters @@ -458,7 +375,7 @@ def test_update_inplace_sets_valid_block_values(): df["a"].fillna(1, inplace=True) # check we havent put a Series into any block.values - assert isinstance(df._mgr.blocks[0].values, pd.Categorical) + assert isinstance(df._mgr.blocks[0].values, Categorical) # smoketest for OP bug from GH#35731 assert df.isnull().sum().sum() == 0 diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index da02a82890adc..757f71730819d 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -61,14 +61,6 @@ def test_nonzero_single_element(self): with pytest.raises(ValueError, match=msg): bool(df) - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = DataFrame({"A": [1, "2", 3.0]}) - result = o._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) - self._compare(result, expected) - def test_metadata_propagation_indiv_groupby(self): # groupby df = DataFrame( diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 0a05a42f0fc39..474661e0f2e0a 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -35,31 +35,11 @@ def test_set_axis_name_raises(self): with pytest.raises(ValueError, match=msg): s._set_axis_name(name="a", axis=1) - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = Series([1, 2, 3]) - result = o._get_numeric_data() - self._compare(result, o) - - o = Series([1, "2", 3.0]) - result = o._get_numeric_data() - expected = Series([], dtype=object, index=pd.Index([], dtype=object)) - self._compare(result, expected) - - o = Series([True, False, True]) - result = o._get_numeric_data() - self._compare(result, o) - + def test_get_bool_data_preserve_dtype(self): o = Series([True, False, True]) result = o._get_bool_data() self._compare(result, o) - o = Series(date_range("20130101", periods=3)) - result = o._get_numeric_data() - expected = Series([], dtype="M8[ns]", index=pd.Index([], dtype=object)) - self._compare(result, expected) - def test_nonzero_single_element(self): # allow single item via bool method diff --git a/pandas/tests/series/methods/test_get_numeric_data.py b/pandas/tests/series/methods/test_get_numeric_data.py new file mode 100644 index 0000000000000..dc0becf46a24c --- /dev/null +++ b/pandas/tests/series/methods/test_get_numeric_data.py @@ -0,0 +1,25 @@ +from pandas import Index, Series, date_range +import pandas._testing as tm + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + obj = Series([1, 2, 3]) + result = obj._get_numeric_data() + tm.assert_series_equal(result, obj) + + obj = Series([1, "2", 3.0]) + result = obj._get_numeric_data() + expected = Series([], dtype=object, index=Index([], dtype=object)) + tm.assert_series_equal(result, expected) + + obj = Series([True, False, True]) + result = obj._get_numeric_data() + tm.assert_series_equal(result, obj) + + obj = Series(date_range("20130101", periods=3)) + result = obj._get_numeric_data() + expected = Series([], dtype="M8[ns]", index=Index([], dtype=object)) + tm.assert_series_equal(result, expected)