14
14
15
15
16
16
class TestDataFrameQuantile :
17
+ @pytest .mark .parametrize (
18
+ "non_num_col" ,
19
+ [
20
+ pd .date_range ("2014-01-01" , periods = 3 , freq = "m" ),
21
+ ["a" , "b" , "c" ],
22
+ [DataFrame , Series , Timestamp ],
23
+ ],
24
+ )
25
+ def test_numeric_only_default_false_warning (self , non_num_col ):
26
+ # GH #7308
27
+ df = DataFrame ({"A" : [1 , 2 , 3 ], "B" : [2 , 3 , 4 ]})
28
+ df ["C" ] = non_num_col
29
+
30
+ expected = Series (
31
+ [2.0 , 3.0 ],
32
+ index = ["A" , "B" ],
33
+ name = 0.5 ,
34
+ )
35
+ with tm .assert_produces_warning (FutureWarning , match = "numeric_only" ):
36
+ result = df .quantile (0.5 )
37
+ tm .assert_series_equal (result , expected )
38
+
17
39
@pytest .mark .parametrize (
18
40
"df,expected" ,
19
41
[
@@ -43,21 +65,21 @@ def test_quantile(self, datetime_frame):
43
65
from numpy import percentile
44
66
45
67
df = datetime_frame
46
- q = df .quantile (0.1 , axis = 0 )
68
+ q = df .quantile (0.1 , axis = 0 , numeric_only = True )
47
69
assert q ["A" ] == percentile (df ["A" ], 10 )
48
70
tm .assert_index_equal (q .index , df .columns )
49
71
50
- q = df .quantile (0.9 , axis = 1 )
72
+ q = df .quantile (0.9 , axis = 1 , numeric_only = True )
51
73
assert q ["2000-01-17" ] == percentile (df .loc ["2000-01-17" ], 90 )
52
74
tm .assert_index_equal (q .index , df .index )
53
75
54
76
# test degenerate case
55
- q = DataFrame ({"x" : [], "y" : []}).quantile (0.1 , axis = 0 )
77
+ q = DataFrame ({"x" : [], "y" : []}).quantile (0.1 , axis = 0 , numeric_only = True )
56
78
assert np .isnan (q ["x" ]) and np .isnan (q ["y" ])
57
79
58
80
# non-numeric exclusion
59
81
df = DataFrame ({"col1" : ["A" , "A" , "B" , "B" ], "col2" : [1 , 2 , 3 , 4 ]})
60
- rs = df .quantile (0.5 )
82
+ rs = df .quantile (0.5 , numeric_only = True )
61
83
with tm .assert_produces_warning (FutureWarning , match = "Select only valid" ):
62
84
xp = df .median ().rename (0.5 )
63
85
tm .assert_series_equal (rs , xp )
@@ -78,7 +100,7 @@ def test_quantile(self, datetime_frame):
78
100
# so that we exclude non-numeric along the same axis
79
101
# See GH #7312
80
102
df = DataFrame ([[1 , 2 , 3 ], ["a" , "b" , 4 ]])
81
- result = df .quantile (0.5 , axis = 1 )
103
+ result = df .quantile (0.5 , axis = 1 , numeric_only = True )
82
104
expected = Series ([3.0 , 4.0 ], index = [0 , 1 ], name = 0.5 )
83
105
tm .assert_series_equal (result , expected )
84
106
@@ -107,7 +129,7 @@ def test_quantile_axis_mixed(self):
107
129
"D" : ["foo" , "bar" , "baz" ],
108
130
}
109
131
)
110
- result = df .quantile (0.5 , axis = 1 )
132
+ result = df .quantile (0.5 , axis = 1 , numeric_only = True )
111
133
expected = Series ([1.5 , 2.5 , 3.5 ], name = 0.5 )
112
134
tm .assert_series_equal (result , expected )
113
135
@@ -206,7 +228,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame):
206
228
207
229
# interpolation = linear (default case)
208
230
df = datetime_frame
209
- q = df .quantile (0.1 , axis = 0 , interpolation = "linear" )
231
+ q = df .quantile (0.1 , axis = 0 , numeric_only = True , interpolation = "linear" )
210
232
assert q ["A" ] == np .percentile (df ["A" ], 10 )
211
233
212
234
def test_quantile_interpolation_int (self , int_frame ):
@@ -249,7 +271,7 @@ def test_quantile_datetime(self):
249
271
df = DataFrame ({"a" : pd .to_datetime (["2010" , "2011" ]), "b" : [0 , 5 ]})
250
272
251
273
# exclude datetime
252
- result = df .quantile (0.5 )
274
+ result = df .quantile (0.5 , numeric_only = True )
253
275
expected = Series ([2.5 ], index = ["b" ])
254
276
255
277
# datetime
@@ -285,11 +307,11 @@ def test_quantile_datetime(self):
285
307
tm .assert_frame_equal (result , expected )
286
308
287
309
# empty when numeric_only=True
288
- result = df [["a" , "c" ]].quantile (0.5 )
310
+ result = df [["a" , "c" ]].quantile (0.5 , numeric_only = True )
289
311
expected = Series ([], index = [], dtype = np .float64 , name = 0.5 )
290
312
tm .assert_series_equal (result , expected )
291
313
292
- result = df [["a" , "c" ]].quantile ([0.5 ])
314
+ result = df [["a" , "c" ]].quantile ([0.5 ], numeric_only = True )
293
315
expected = DataFrame (index = [0.5 ])
294
316
tm .assert_frame_equal (result , expected )
295
317
@@ -567,12 +589,12 @@ def test_quantile_empty_no_columns(self):
567
589
# GH#23925 _get_numeric_data may drop all columns
568
590
df = DataFrame (pd .date_range ("1/1/18" , periods = 5 ))
569
591
df .columns .name = "captain tightpants"
570
- result = df .quantile (0.5 )
592
+ result = df .quantile (0.5 , numeric_only = True )
571
593
expected = Series ([], index = [], name = 0.5 , dtype = np .float64 )
572
594
expected .index .name = "captain tightpants"
573
595
tm .assert_series_equal (result , expected )
574
596
575
- result = df .quantile ([0.5 ])
597
+ result = df .quantile ([0.5 ], numeric_only = True )
576
598
expected = DataFrame ([], index = [0.5 ], columns = [])
577
599
expected .columns .name = "captain tightpants"
578
600
tm .assert_frame_equal (result , expected )
@@ -763,7 +785,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
763
785
"c" : pd .to_datetime (["2011" , "2012" ]),
764
786
}
765
787
)
766
- result = df [["a" , "c" ]].quantile (0.5 , axis = axis )
788
+ result = df [["a" , "c" ]].quantile (0.5 , axis = axis , numeric_only = True )
767
789
expected = Series (
768
790
expected_data , name = 0.5 , index = Index (expected_index ), dtype = np .float64
769
791
)
0 commit comments