Skip to content

Commit 4479c82

Browse files
author
TomAugspurger
committed
BUG: dont use Counter and make test unambiguous
1 parent 12a585c commit 4479c82

File tree

2 files changed

+17
-20
lines changed

2 files changed

+17
-20
lines changed

pandas/core/generic.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -3573,22 +3573,18 @@ def describe_numeric_1d(series, percentiles):
35733573
[series.max()])
35743574

35753575
def describe_categorical_1d(data):
3576-
if data.dtype == object:
3577-
names = ['count', 'unique']
3578-
objcounts = data.value_counts()
3579-
result = [data.count(), len(objcounts)]
3580-
if result[1] > 0:
3576+
names = ['count', 'unique']
3577+
objcounts = data.value_counts()
3578+
result = [data.count(), len(objcounts)]
3579+
if result[1] > 0:
3580+
top, freq = objcounts.index[0], objcounts.iloc[0]
3581+
3582+
if data.dtype == object:
35813583
names += ['top', 'freq']
3582-
top, freq = objcounts.index[0], objcounts.iloc[0]
35833584
result += [top, freq]
35843585

3585-
elif issubclass(data.dtype.type, np.datetime64):
3586-
names = ['count', 'unique']
3587-
asint = data.dropna().values.view('i8')
3588-
objcounts = compat.Counter(asint)
3589-
result = [data.count(), len(objcounts)]
3590-
if result[1] > 0:
3591-
top, freq = objcounts.most_common(1)[0]
3586+
elif issubclass(data.dtype.type, np.datetime64):
3587+
asint = data.dropna().values.view('i8')
35923588
names += ['first', 'last', 'top', 'freq']
35933589
result += [lib.Timestamp(asint.min()),
35943590
lib.Timestamp(asint.max()),

pandas/tests/test_generic.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -988,30 +988,31 @@ def test_describe_objects(self):
988988
assert_frame_equal(result, expected)
989989

990990
df = DataFrame({"C1": pd.date_range('2010-01-01', periods=4, freq='D')})
991+
df.loc[4] = pd.Timestamp('2010-01-04')
991992
result = df.describe()
992-
expected = DataFrame({"C1": [4, 4, pd.Timestamp('2010-01-01'),
993+
expected = DataFrame({"C1": [5, 4, pd.Timestamp('2010-01-01'),
993994
pd.Timestamp('2010-01-04'),
994-
pd.Timestamp('2010-01-01'), 1]},
995+
pd.Timestamp('2010-01-04'), 2]},
995996
index=['count', 'unique', 'first', 'last', 'top',
996997
'freq'])
997998
assert_frame_equal(result, expected)
998999

9991000
# mix time and str
1000-
df['C2'] = ['a', 'a', 'b', 'c']
1001+
df['C2'] = ['a', 'a', 'b', 'c', 'a']
10011002
result = df.describe()
10021003
# when mix of dateimte / obj the index gets reordered.
1003-
expected['C2'] = [4, 3, np.nan, np.nan, 'a', 2]
1004+
expected['C2'] = [5, 3, np.nan, np.nan, 'a', 3]
10041005
assert_frame_equal(result, expected)
10051006

10061007
# just str
1007-
expected = DataFrame({'C2': [4, 3, 'a', 2]},
1008+
expected = DataFrame({'C2': [5, 3, 'a', 4]},
10081009
index=['count', 'unique', 'top', 'freq'])
10091010
result = df[['C2']].describe()
10101011

10111012
# mix of time, str, numeric
1012-
df['C3'] = [2, 4, 6, 8]
1013+
df['C3'] = [2, 4, 6, 8, 2]
10131014
result = df.describe()
1014-
expected = DataFrame({"C3": [4., 5., 2.5819889, 2., 3.5, 5., 6.5, 8.]},
1015+
expected = DataFrame({"C3": [5., 4.4, 2.607681, 2., 2., 4., 6., 8.]},
10151016
index=['count', 'mean', 'std', 'min', '25%',
10161017
'50%', '75%', 'max'])
10171018
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)