Skip to content

Commit d5f6dd9

Browse files
authored
TST/REF: collect tests for get_numeric_data (#37634)
* misplaced loc test * TST/REF: collect get_numeric_data tests
1 parent adeed7a commit d5f6dd9

File tree

7 files changed

+128
-119
lines changed

7 files changed

+128
-119
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import numpy as np
2+
3+
from pandas import Categorical, DataFrame, Index, Series, Timestamp
4+
import pandas._testing as tm
5+
from pandas.core.arrays import IntervalArray, integer_array
6+
7+
8+
class TestGetNumericData:
9+
def test_get_numeric_data_preserve_dtype(self):
10+
# get the numeric data
11+
obj = DataFrame({"A": [1, "2", 3.0]})
12+
result = obj._get_numeric_data()
13+
expected = DataFrame(index=[0, 1, 2], dtype=object)
14+
tm.assert_frame_equal(result, expected)
15+
16+
def test_get_numeric_data(self):
17+
18+
datetime64name = np.dtype("M8[ns]").name
19+
objectname = np.dtype(np.object_).name
20+
21+
df = DataFrame(
22+
{"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")},
23+
index=np.arange(10),
24+
)
25+
result = df.dtypes
26+
expected = Series(
27+
[
28+
np.dtype("float64"),
29+
np.dtype("int64"),
30+
np.dtype(objectname),
31+
np.dtype(datetime64name),
32+
],
33+
index=["a", "b", "c", "f"],
34+
)
35+
tm.assert_series_equal(result, expected)
36+
37+
df = DataFrame(
38+
{
39+
"a": 1.0,
40+
"b": 2,
41+
"c": "foo",
42+
"d": np.array([1.0] * 10, dtype="float32"),
43+
"e": np.array([1] * 10, dtype="int32"),
44+
"f": np.array([1] * 10, dtype="int16"),
45+
"g": Timestamp("20010102"),
46+
},
47+
index=np.arange(10),
48+
)
49+
50+
result = df._get_numeric_data()
51+
expected = df.loc[:, ["a", "b", "d", "e", "f"]]
52+
tm.assert_frame_equal(result, expected)
53+
54+
only_obj = df.loc[:, ["c", "g"]]
55+
result = only_obj._get_numeric_data()
56+
expected = df.loc[:, []]
57+
tm.assert_frame_equal(result, expected)
58+
59+
df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]})
60+
result = df._get_numeric_data()
61+
expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]})
62+
tm.assert_frame_equal(result, expected)
63+
64+
df = result.copy()
65+
result = df._get_numeric_data()
66+
expected = df
67+
tm.assert_frame_equal(result, expected)
68+
69+
def test_get_numeric_data_mixed_dtype(self):
70+
# numeric and object columns
71+
72+
df = DataFrame(
73+
{
74+
"a": [1, 2, 3],
75+
"b": [True, False, True],
76+
"c": ["foo", "bar", "baz"],
77+
"d": [None, None, None],
78+
"e": [3.14, 0.577, 2.773],
79+
}
80+
)
81+
result = df._get_numeric_data()
82+
tm.assert_index_equal(result.columns, Index(["a", "b", "e"]))
83+
84+
def test_get_numeric_data_extension_dtype(self):
85+
# GH#22290
86+
df = DataFrame(
87+
{
88+
"A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"),
89+
"B": Categorical(list("abcabc")),
90+
"C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"),
91+
"D": IntervalArray.from_breaks(range(7)),
92+
}
93+
)
94+
result = df._get_numeric_data()
95+
expected = df.loc[:, ["A", "C"]]
96+
tm.assert_frame_equal(result, expected)

pandas/tests/frame/test_block_internals.py

+1-84
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
option_context,
1717
)
1818
import pandas._testing as tm
19-
from pandas.core.arrays import IntervalArray, integer_array
2019
from pandas.core.internals import ObjectBlock
2120
from pandas.core.internals.blocks import IntBlock
2221

@@ -306,73 +305,6 @@ def test_is_mixed_type(self, float_frame, float_string_frame):
306305
assert not float_frame._is_mixed_type
307306
assert float_string_frame._is_mixed_type
308307

309-
def test_get_numeric_data(self):
310-
311-
datetime64name = np.dtype("M8[ns]").name
312-
objectname = np.dtype(np.object_).name
313-
314-
df = DataFrame(
315-
{"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")},
316-
index=np.arange(10),
317-
)
318-
result = df.dtypes
319-
expected = Series(
320-
[
321-
np.dtype("float64"),
322-
np.dtype("int64"),
323-
np.dtype(objectname),
324-
np.dtype(datetime64name),
325-
],
326-
index=["a", "b", "c", "f"],
327-
)
328-
tm.assert_series_equal(result, expected)
329-
330-
df = DataFrame(
331-
{
332-
"a": 1.0,
333-
"b": 2,
334-
"c": "foo",
335-
"d": np.array([1.0] * 10, dtype="float32"),
336-
"e": np.array([1] * 10, dtype="int32"),
337-
"f": np.array([1] * 10, dtype="int16"),
338-
"g": Timestamp("20010102"),
339-
},
340-
index=np.arange(10),
341-
)
342-
343-
result = df._get_numeric_data()
344-
expected = df.loc[:, ["a", "b", "d", "e", "f"]]
345-
tm.assert_frame_equal(result, expected)
346-
347-
only_obj = df.loc[:, ["c", "g"]]
348-
result = only_obj._get_numeric_data()
349-
expected = df.loc[:, []]
350-
tm.assert_frame_equal(result, expected)
351-
352-
df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]})
353-
result = df._get_numeric_data()
354-
expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]})
355-
tm.assert_frame_equal(result, expected)
356-
357-
df = result.copy()
358-
result = df._get_numeric_data()
359-
expected = df
360-
tm.assert_frame_equal(result, expected)
361-
362-
def test_get_numeric_data_extension_dtype(self):
363-
# GH 22290
364-
df = DataFrame(
365-
{
366-
"A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"),
367-
"B": Categorical(list("abcabc")),
368-
"C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"),
369-
"D": IntervalArray.from_breaks(range(7)),
370-
}
371-
)
372-
result = df._get_numeric_data()
373-
expected = df.loc[:, ["A", "C"]]
374-
tm.assert_frame_equal(result, expected)
375-
376308
def test_stale_cached_series_bug_473(self):
377309

378310
# this is chained, but ok
@@ -390,21 +322,6 @@ def test_stale_cached_series_bug_473(self):
390322
exp = Y["g"].sum() # noqa
391323
assert pd.isna(Y["g"]["c"])
392324

393-
def test_get_X_columns(self):
394-
# numeric and object columns
395-
396-
df = DataFrame(
397-
{
398-
"a": [1, 2, 3],
399-
"b": [True, False, True],
400-
"c": ["foo", "bar", "baz"],
401-
"d": [None, None, None],
402-
"e": [3.14, 0.577, 2.773],
403-
}
404-
)
405-
406-
tm.assert_index_equal(df._get_numeric_data().columns, pd.Index(["a", "b", "e"]))
407-
408325
def test_strange_column_corruption_issue(self):
409326
# FIXME: dont leave commented-out
410327
# (wesm) Unclear how exactly this is related to internal matters
@@ -458,7 +375,7 @@ def test_update_inplace_sets_valid_block_values():
458375
df["a"].fillna(1, inplace=True)
459376

460377
# check we havent put a Series into any block.values
461-
assert isinstance(df._mgr.blocks[0].values, pd.Categorical)
378+
assert isinstance(df._mgr.blocks[0].values, Categorical)
462379

463380
# smoketest for OP bug from GH#35731
464381
assert df.isnull().sum().sum() == 0

pandas/tests/generic/test_frame.py

-8
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,6 @@ def test_nonzero_single_element(self):
6161
with pytest.raises(ValueError, match=msg):
6262
bool(df)
6363

64-
def test_get_numeric_data_preserve_dtype(self):
65-
66-
# get the numeric data
67-
o = DataFrame({"A": [1, "2", 3.0]})
68-
result = o._get_numeric_data()
69-
expected = DataFrame(index=[0, 1, 2], dtype=object)
70-
self._compare(result, expected)
71-
7264
def test_metadata_propagation_indiv_groupby(self):
7365
# groupby
7466
df = DataFrame(

pandas/tests/generic/test_series.py

+1-21
Original file line numberDiff line numberDiff line change
@@ -35,31 +35,11 @@ def test_set_axis_name_raises(self):
3535
with pytest.raises(ValueError, match=msg):
3636
s._set_axis_name(name="a", axis=1)
3737

38-
def test_get_numeric_data_preserve_dtype(self):
39-
40-
# get the numeric data
41-
o = Series([1, 2, 3])
42-
result = o._get_numeric_data()
43-
self._compare(result, o)
44-
45-
o = Series([1, "2", 3.0])
46-
result = o._get_numeric_data()
47-
expected = Series([], dtype=object, index=pd.Index([], dtype=object))
48-
self._compare(result, expected)
49-
50-
o = Series([True, False, True])
51-
result = o._get_numeric_data()
52-
self._compare(result, o)
53-
38+
def test_get_bool_data_preserve_dtype(self):
5439
o = Series([True, False, True])
5540
result = o._get_bool_data()
5641
self._compare(result, o)
5742

58-
o = Series(date_range("20130101", periods=3))
59-
result = o._get_numeric_data()
60-
expected = Series([], dtype="M8[ns]", index=pd.Index([], dtype=object))
61-
self._compare(result, expected)
62-
6343
def test_nonzero_single_element(self):
6444

6545
# allow single item via bool method

pandas/tests/indexing/test_loc.py

+5
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ def test_setitem_from_duplicate_axis(self):
132132
class TestLoc2:
133133
# TODO: better name, just separating out things that rely on base class
134134

135+
def test_loc_getitem_missing_unicode_key(self):
136+
df = DataFrame({"a": [1]})
137+
with pytest.raises(KeyError, match="\u05d0"):
138+
df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
139+
135140
def test_loc_getitem_dups(self):
136141
# GH 5678
137142
# repeated getitems on a dup index returning a ndarray

pandas/tests/internals/test_internals.py

-6
Original file line numberDiff line numberDiff line change
@@ -1146,12 +1146,6 @@ def test_make_block_no_pandas_array():
11461146
assert result.is_extension is False
11471147

11481148

1149-
def test_missing_unicode_key():
1150-
df = DataFrame({"a": [1]})
1151-
with pytest.raises(KeyError, match="\u05d0"):
1152-
df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
1153-
1154-
11551149
def test_single_block_manager_fastpath_deprecated():
11561150
# GH#33092
11571151
ser = Series(range(3))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from pandas import Index, Series, date_range
2+
import pandas._testing as tm
3+
4+
5+
class TestGetNumericData:
6+
def test_get_numeric_data_preserve_dtype(self):
7+
8+
# get the numeric data
9+
obj = Series([1, 2, 3])
10+
result = obj._get_numeric_data()
11+
tm.assert_series_equal(result, obj)
12+
13+
obj = Series([1, "2", 3.0])
14+
result = obj._get_numeric_data()
15+
expected = Series([], dtype=object, index=Index([], dtype=object))
16+
tm.assert_series_equal(result, expected)
17+
18+
obj = Series([True, False, True])
19+
result = obj._get_numeric_data()
20+
tm.assert_series_equal(result, obj)
21+
22+
obj = Series(date_range("20130101", periods=3))
23+
result = obj._get_numeric_data()
24+
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
25+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)