Skip to content

Commit d4608f5

Browse files
committed
BUG: test coverage and bugfix in rank_2d_generic
1 parent a7402c6 commit d4608f5

File tree

3 files changed

+26
-42
lines changed

3 files changed

+26
-42
lines changed

pandas/core/frame.py

+13-38
Original file line numberDiff line numberDiff line change
@@ -3102,8 +3102,10 @@ def corr(self, method='pearson'):
31023102
-------
31033103
y : DataFrame
31043104
"""
3105-
cols = self._get_numeric_columns()
3106-
mat = self.as_matrix(cols).T
3105+
numeric_df = self._get_numeric_data()
3106+
mat = numeric_df.values.T
3107+
cols = numeric_df.columns
3108+
31073109
corrf = nanops.get_corr_func(method)
31083110
K = len(cols)
31093111
correl = np.empty((K, K), dtype=float)
@@ -3128,8 +3130,9 @@ def cov(self):
31283130
-------
31293131
y : DataFrame
31303132
"""
3131-
cols = self._get_numeric_columns()
3132-
mat = self.as_matrix(cols).T
3133+
numeric_df = self._get_numeric_data()
3134+
mat = numeric_df.values.T
3135+
cols = numeric_df.columns
31333136
baseCov = np.cov(mat)
31343137

31353138
for i, j, ac, bc in self._cov_helper(mat):
@@ -3205,9 +3208,9 @@ def describe(self):
32053208
-------
32063209
DataFrame of summary statistics
32073210
"""
3208-
numeric_columns = self._get_numeric_columns()
3211+
numdata = self._get_numeric_data()
32093212

3210-
if len(numeric_columns) == 0:
3213+
if len(numdata.columns) == 0:
32113214
return DataFrame(dict((k, v.describe())
32123215
for k, v in self.iteritems()),
32133216
columns=self.columns)
@@ -3217,13 +3220,14 @@ def describe(self):
32173220

32183221
destat = []
32193222

3220-
for column in numeric_columns:
3223+
for column in numdata.columns:
32213224
series = self[column]
32223225
destat.append([series.count(), series.mean(), series.std(),
32233226
series.min(), series.quantile(.25), series.median(),
32243227
series.quantile(.75), series.max()])
32253228

3226-
return self._constructor(map(list, zip(*destat)), index=destat_columns, columns=numeric_columns)
3229+
return self._constructor(map(list, zip(*destat)), index=destat_columns,
3230+
columns=numdata.columns)
32273231

32283232
#----------------------------------------------------------------------
32293233
# ndarray-like stats methods
@@ -3252,7 +3256,7 @@ def count(self, axis=0, level=None, numeric_only=False):
32523256
numeric_only=numeric_only)
32533257

32543258
if numeric_only:
3255-
frame = self.ix[:, self._get_numeric_columns()]
3259+
frame = self._get_numeric_data()
32563260
else:
32573261
frame = self
32583262

@@ -3486,26 +3490,6 @@ def _get_agg_axis(self, axis_num):
34863490
else:
34873491
raise Exception('Must have 0<= axis <= 1')
34883492

3489-
def _get_numeric_columns(self):
3490-
from pandas.core.internals import ObjectBlock
3491-
3492-
cols = []
3493-
for col, blk in zip(self.columns, self._data.block_id_vector):
3494-
if not isinstance(self._data.blocks[blk], ObjectBlock):
3495-
cols.append(col)
3496-
3497-
return cols
3498-
3499-
def _get_nonnumeric_columns(self):
3500-
from pandas.core.internals import ObjectBlock
3501-
3502-
cols = []
3503-
for col, blk in zip(self.columns, self._data.block_id_vector):
3504-
if isinstance(self._data.blocks[blk], ObjectBlock):
3505-
cols.append(col)
3506-
3507-
return cols
3508-
35093493
def _get_numeric_data(self):
35103494
if self._is_mixed_type:
35113495
num_data = self._data.get_numeric_data()
@@ -3516,15 +3500,6 @@ def _get_numeric_data(self):
35163500
else:
35173501
return self.ix[:, []]
35183502

3519-
def _get_nonnumeric_data(self):
3520-
if self._is_mixed_type:
3521-
return self.ix[:, self._get_nonnumeric_columns()]
3522-
else:
3523-
if self.values.dtype == np.object_:
3524-
return self
3525-
else:
3526-
return self.ix[:, []]
3527-
35283503
def quantile(self, q=0.5, axis=0):
35293504
"""
35303505
Return values at the given quantile over requested axis, a la

pandas/src/stats.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def rank_2d_float64(object in_arr, axis=0):
9090
else:
9191
return ranks
9292

93-
def rank_1d_generic(object in_arr):
93+
def rank_1d_generic(object in_arr, bint retry=1):
9494
"""
9595
Fast NaN-friendly version of scipy.stats.rankdata
9696
"""
@@ -120,8 +120,11 @@ def rank_1d_generic(object in_arr):
120120
try:
121121
_as = values.argsort()
122122
except TypeError:
123+
if not retry:
124+
raise
125+
123126
valid_locs = (-mask).nonzero()[0]
124-
ranks.put(valid_locs, rank_1d_generic(values.take(valid_locs)))
127+
ranks.put(valid_locs, rank_1d_generic(values.take(valid_locs), 0))
125128
np.putmask(ranks, mask, np.nan)
126129
return ranks
127130

pandas/tests/test_frame.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -3803,8 +3803,8 @@ def test_get_X_columns(self):
38033803
'd' : [None, None, None],
38043804
'e' : [3.14, 0.577, 2.773]})
38053805

3806-
self.assertEquals(df._get_numeric_columns(), ['a', 'e'])
3807-
# self.assertEquals(df._get_object_columns(), ['c', 'd'])
3806+
self.assert_(np.array_equal(df._get_numeric_data().columns,
3807+
['a', 'e']))
38083808

38093809
def test_get_numeric_data(self):
38103810
df = DataFrame({'a' : 1., 'b' : 2, 'c' : 'foo'},
@@ -4196,6 +4196,12 @@ def test_rank2(self):
41964196
result = df.rank(1, numeric_only=False)
41974197
assert_frame_equal(result, expected)
41984198

4199+
# mixed-type frames
4200+
self.mixed_frame['foo'] = datetime.now()
4201+
result = self.mixed_frame.rank(1)
4202+
expected = self.mixed_frame.rank(1, numeric_only=True)
4203+
assert_frame_equal(result, expected)
4204+
41994205
def test_describe(self):
42004206
desc = self.tsframe.describe()
42014207
desc = self.mixed_frame.describe()

0 commit comments

Comments
 (0)