Skip to content

TST/REF: collect tests for get_numeric_data #37634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions pandas/tests/frame/methods/test_get_numeric_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import numpy as np

from pandas import Categorical, DataFrame, Index, Series, Timestamp
import pandas._testing as tm
from pandas.core.arrays import IntervalArray, integer_array


class TestGetNumericData:
def test_get_numeric_data_preserve_dtype(self):
# get the numeric data
obj = DataFrame({"A": [1, "2", 3.0]})
result = obj._get_numeric_data()
expected = DataFrame(index=[0, 1, 2], dtype=object)
tm.assert_frame_equal(result, expected)

def test_get_numeric_data(self):

datetime64name = np.dtype("M8[ns]").name
objectname = np.dtype(np.object_).name

df = DataFrame(
{"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")},
index=np.arange(10),
)
result = df.dtypes
expected = Series(
[
np.dtype("float64"),
np.dtype("int64"),
np.dtype(objectname),
np.dtype(datetime64name),
],
index=["a", "b", "c", "f"],
)
tm.assert_series_equal(result, expected)

df = DataFrame(
{
"a": 1.0,
"b": 2,
"c": "foo",
"d": np.array([1.0] * 10, dtype="float32"),
"e": np.array([1] * 10, dtype="int32"),
"f": np.array([1] * 10, dtype="int16"),
"g": Timestamp("20010102"),
},
index=np.arange(10),
)

result = df._get_numeric_data()
expected = df.loc[:, ["a", "b", "d", "e", "f"]]
tm.assert_frame_equal(result, expected)

only_obj = df.loc[:, ["c", "g"]]
result = only_obj._get_numeric_data()
expected = df.loc[:, []]
tm.assert_frame_equal(result, expected)

df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]})
result = df._get_numeric_data()
expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]})
tm.assert_frame_equal(result, expected)

df = result.copy()
result = df._get_numeric_data()
expected = df
tm.assert_frame_equal(result, expected)

def test_get_numeric_data_mixed_dtype(self):
# numeric and object columns

df = DataFrame(
{
"a": [1, 2, 3],
"b": [True, False, True],
"c": ["foo", "bar", "baz"],
"d": [None, None, None],
"e": [3.14, 0.577, 2.773],
}
)
result = df._get_numeric_data()
tm.assert_index_equal(result.columns, Index(["a", "b", "e"]))

def test_get_numeric_data_extension_dtype(self):
# GH#22290
df = DataFrame(
{
"A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"),
"B": Categorical(list("abcabc")),
"C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"),
"D": IntervalArray.from_breaks(range(7)),
}
)
result = df._get_numeric_data()
expected = df.loc[:, ["A", "C"]]
tm.assert_frame_equal(result, expected)
85 changes: 1 addition & 84 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
option_context,
)
import pandas._testing as tm
from pandas.core.arrays import IntervalArray, integer_array
from pandas.core.internals import ObjectBlock
from pandas.core.internals.blocks import IntBlock

Expand Down Expand Up @@ -306,73 +305,6 @@ def test_is_mixed_type(self, float_frame, float_string_frame):
assert not float_frame._is_mixed_type
assert float_string_frame._is_mixed_type

def test_get_numeric_data(self):

datetime64name = np.dtype("M8[ns]").name
objectname = np.dtype(np.object_).name

df = DataFrame(
{"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")},
index=np.arange(10),
)
result = df.dtypes
expected = Series(
[
np.dtype("float64"),
np.dtype("int64"),
np.dtype(objectname),
np.dtype(datetime64name),
],
index=["a", "b", "c", "f"],
)
tm.assert_series_equal(result, expected)

df = DataFrame(
{
"a": 1.0,
"b": 2,
"c": "foo",
"d": np.array([1.0] * 10, dtype="float32"),
"e": np.array([1] * 10, dtype="int32"),
"f": np.array([1] * 10, dtype="int16"),
"g": Timestamp("20010102"),
},
index=np.arange(10),
)

result = df._get_numeric_data()
expected = df.loc[:, ["a", "b", "d", "e", "f"]]
tm.assert_frame_equal(result, expected)

only_obj = df.loc[:, ["c", "g"]]
result = only_obj._get_numeric_data()
expected = df.loc[:, []]
tm.assert_frame_equal(result, expected)

df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]})
result = df._get_numeric_data()
expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]})
tm.assert_frame_equal(result, expected)

df = result.copy()
result = df._get_numeric_data()
expected = df
tm.assert_frame_equal(result, expected)

def test_get_numeric_data_extension_dtype(self):
# GH 22290
df = DataFrame(
{
"A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"),
"B": Categorical(list("abcabc")),
"C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"),
"D": IntervalArray.from_breaks(range(7)),
}
)
result = df._get_numeric_data()
expected = df.loc[:, ["A", "C"]]
tm.assert_frame_equal(result, expected)

def test_stale_cached_series_bug_473(self):

# this is chained, but ok
Expand All @@ -390,21 +322,6 @@ def test_stale_cached_series_bug_473(self):
exp = Y["g"].sum() # noqa
assert pd.isna(Y["g"]["c"])

def test_get_X_columns(self):
# numeric and object columns

df = DataFrame(
{
"a": [1, 2, 3],
"b": [True, False, True],
"c": ["foo", "bar", "baz"],
"d": [None, None, None],
"e": [3.14, 0.577, 2.773],
}
)

tm.assert_index_equal(df._get_numeric_data().columns, pd.Index(["a", "b", "e"]))

def test_strange_column_corruption_issue(self):
# FIXME: dont leave commented-out
# (wesm) Unclear how exactly this is related to internal matters
Expand Down Expand Up @@ -458,7 +375,7 @@ def test_update_inplace_sets_valid_block_values():
df["a"].fillna(1, inplace=True)

# check we havent put a Series into any block.values
assert isinstance(df._mgr.blocks[0].values, pd.Categorical)
assert isinstance(df._mgr.blocks[0].values, Categorical)

# smoketest for OP bug from GH#35731
assert df.isnull().sum().sum() == 0
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/generic/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,6 @@ def test_nonzero_single_element(self):
with pytest.raises(ValueError, match=msg):
bool(df)

def test_get_numeric_data_preserve_dtype(self):

# get the numeric data
o = DataFrame({"A": [1, "2", 3.0]})
result = o._get_numeric_data()
expected = DataFrame(index=[0, 1, 2], dtype=object)
self._compare(result, expected)

def test_metadata_propagation_indiv_groupby(self):
# groupby
df = DataFrame(
Expand Down
22 changes: 1 addition & 21 deletions pandas/tests/generic/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,11 @@ def test_set_axis_name_raises(self):
with pytest.raises(ValueError, match=msg):
s._set_axis_name(name="a", axis=1)

def test_get_numeric_data_preserve_dtype(self):

# get the numeric data
o = Series([1, 2, 3])
result = o._get_numeric_data()
self._compare(result, o)

o = Series([1, "2", 3.0])
result = o._get_numeric_data()
expected = Series([], dtype=object, index=pd.Index([], dtype=object))
self._compare(result, expected)

o = Series([True, False, True])
result = o._get_numeric_data()
self._compare(result, o)

def test_get_bool_data_preserve_dtype(self):
o = Series([True, False, True])
result = o._get_bool_data()
self._compare(result, o)

o = Series(date_range("20130101", periods=3))
result = o._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=pd.Index([], dtype=object))
self._compare(result, expected)

def test_nonzero_single_element(self):

# allow single item via bool method
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def test_setitem_from_duplicate_axis(self):
class TestLoc2:
# TODO: better name, just separating out things that rely on base class

def test_loc_getitem_missing_unicode_key(self):
df = DataFrame({"a": [1]})
with pytest.raises(KeyError, match="\u05d0"):
df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError

def test_loc_getitem_dups(self):
# GH 5678
# repeated getitems on a dup index returning a ndarray
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,12 +1146,6 @@ def test_make_block_no_pandas_array():
assert result.is_extension is False


def test_missing_unicode_key():
df = DataFrame({"a": [1]})
with pytest.raises(KeyError, match="\u05d0"):
df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError


def test_single_block_manager_fastpath_deprecated():
# GH#33092
ser = Series(range(3))
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/series/methods/test_get_numeric_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pandas import Index, Series, date_range
import pandas._testing as tm


class TestGetNumericData:
def test_get_numeric_data_preserve_dtype(self):

# get the numeric data
obj = Series([1, 2, 3])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)

obj = Series([1, "2", 3.0])
result = obj._get_numeric_data()
expected = Series([], dtype=object, index=Index([], dtype=object))
tm.assert_series_equal(result, expected)

obj = Series([True, False, True])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)

obj = Series(date_range("20130101", periods=3))
result = obj._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
tm.assert_series_equal(result, expected)