Skip to content

Commit e775d09

Browse files
committed
DOC: DataFrame stat method docstring cleanup/templating, some Series stat level docstring improvement, still need templating there
1 parent e88507a commit e775d09

File tree

2 files changed

+81
-223
lines changed

2 files changed

+81
-223
lines changed

pandas/core/frame.py

+45-186
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
_try_sort, _pfixed, _default_index,
2626
_infer_dtype, _stringify, _maybe_upcast)
2727
from pandas.core.daterange import DateRange
28-
from pandas.core.generic import AxisProperty, NDFrame
28+
from pandas.core.generic import NDFrame
2929
from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index
3030
from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels
3131
from pandas.core.internals import BlockManager, make_block, form_blocks
@@ -60,6 +60,39 @@
6060
result : DataFrame
6161
"""
6262

63+
_stat_doc = """
64+
Return %(name)s over requested axis.
65+
%(na_action)s
66+
67+
Parameters
68+
----------
69+
axis : {0, 1}
70+
0 for row-wise, 1 for column-wise
71+
skipna : boolean, default True
72+
Exclude NA/null values. If an entire row/column is NA, the result
73+
will be NA
74+
level : int, default None
75+
If the axis is a MultiIndex (hierarchical), count along a
76+
particular level, collapsing into a DataFrame
77+
%(extras)s
78+
Returns
79+
-------
80+
%(shortname)s : Series (or DataFrame if level specified)
81+
"""
82+
83+
_doc_exclude_na = "NA/null values are excluded"
84+
85+
_numeric_only_doc = """numeric_only : boolean, default False
86+
Include only float, int, boolean data
87+
"""
88+
89+
def _add_stat_doc(f, name, shortname, na_action=_doc_exclude_na,
90+
extras=''):
91+
doc = _stat_doc % {'name' : name,
92+
'shortname' : shortname,
93+
'na_action' : na_action,
94+
'extras' : extras}
95+
f.__doc__ = doc
6396

6497
def _arith_method(func, name, default_axis='columns'):
6598
def f(self, other, axis=default_axis, fill_value=None):
@@ -2426,38 +2459,6 @@ def _count_level(self, level, axis=0, numeric_only=False):
24262459
return DataFrame(result, index=index, columns=columns)
24272460

24282461
def sum(self, axis=0, numeric_only=False, skipna=True, level=None):
2429-
"""
2430-
Return sum over requested axis
2431-
2432-
Parameters
2433-
----------
2434-
axis : {0, 1}
2435-
0 for row-wise, 1 for column-wise
2436-
numeric_only : boolean, default False
2437-
Include only float, int, boolean data
2438-
skipna : boolean, default True
2439-
Exclude NA/null values. If an entire row/column is NA, the result
2440-
will be NA
2441-
level : integer, default None
2442-
Choose a level to groupby before applying operation
2443-
2444-
Examples
2445-
--------
2446-
>>> df
2447-
c1 c2
2448-
a 1 0
2449-
b 0 2
2450-
c 3 0
2451-
d 0 4
2452-
2453-
>>> df.sum(axis=0)
2454-
c1 4
2455-
c2 6
2456-
2457-
Returns
2458-
-------
2459-
sum : Series
2460-
"""
24612462
if not level is None:
24622463
sumfunc = lambda x: x.sum(skipna=skipna)
24632464
return self.groupby(level=level).aggregate(sumfunc)
@@ -2484,25 +2485,9 @@ def sum(self, axis=0, numeric_only=False, skipna=True, level=None):
24842485
the_sum[ct_mask] = nan
24852486

24862487
return Series(the_sum, index=axis_labels)
2488+
_add_stat_doc(sum, 'sum', 'sum', extras=_numeric_only_doc)
24872489

24882490
def min(self, axis=0, skipna=True, level=None):
2489-
"""
2490-
Return minimum over requested axis. NA/null values are excluded
2491-
2492-
Parameters
2493-
----------
2494-
axis : {0, 1}
2495-
0 for row-wise, 1 for column-wise
2496-
skipna : boolean, default True
2497-
Exclude NA/null values. If an entire row/column is NA, the result
2498-
will be NA
2499-
level : integer, default None
2500-
Choose a level to groupby before applying operation
2501-
2502-
Returns
2503-
-------
2504-
min : Series
2505-
"""
25062491
values = self.values.copy()
25072492
if skipna and not issubclass(values.dtype.type, np.integer):
25082493
np.putmask(values, -np.isfinite(values), np.inf)
@@ -2512,25 +2497,9 @@ def min(self, axis=0, skipna=True, level=None):
25122497
return self.groupby(level=level).aggregate(minfunc)
25132498

25142499
return Series(values.min(axis), index=self._get_agg_axis(axis))
2500+
_add_stat_doc(min, 'minimum', 'min')
25152501

25162502
def max(self, axis=0, skipna=True, level=None):
2517-
"""
2518-
Return maximum over requested axis. NA/null values are excluded
2519-
2520-
Parameters
2521-
----------
2522-
axis : {0, 1}
2523-
0 for row-wise, 1 for column-wise
2524-
skipna : boolean, default True
2525-
Exclude NA/null values. If an entire row/column is NA, the result
2526-
will be NA
2527-
level : integer, default None
2528-
Choose a level to groupby before applying operation
2529-
2530-
Returns
2531-
-------
2532-
max : Series
2533-
"""
25342503
values = self.values.copy()
25352504
if skipna and not issubclass(values.dtype.type, np.integer):
25362505
np.putmask(values, -np.isfinite(values), -np.inf)
@@ -2540,25 +2509,9 @@ def max(self, axis=0, skipna=True, level=None):
25402509
return self.groupby(level=level).aggregate(maxfunc)
25412510

25422511
return Series(values.max(axis), index=self._get_agg_axis(axis))
2512+
_add_stat_doc(max, 'maximum', 'max')
25432513

25442514
def prod(self, axis=0, skipna=True, level=None):
2545-
"""
2546-
Return product over requested axis. NA/null values are treated as 1
2547-
2548-
Parameters
2549-
----------
2550-
axis : {0, 1}
2551-
0 for row-wise, 1 for column-wise
2552-
skipna : boolean, default True
2553-
Exclude NA/null values. If an entire row/column is NA, the result
2554-
will be NA
2555-
level : integer, default None
2556-
Choose a level to groupby before applying operation
2557-
2558-
Returns
2559-
-------
2560-
product : Series
2561-
"""
25622515
if not level is None:
25632516
prodfunc = lambda x: x.prod(skipna=skipna)
25642517
return self.groupby(level=level).aggregate(prodfunc)
@@ -2572,33 +2525,19 @@ def prod(self, axis=0, skipna=True, level=None):
25722525
result[count == 0] = nan
25732526

25742527
return Series(result, index=self._get_agg_axis(axis))
2528+
_add_stat_doc(prod, 'product', 'product',
2529+
na_action='NA/null values are treated as 1')
25752530
product = prod
25762531

25772532
def mean(self, axis=0, skipna=True, level=None):
2578-
"""
2579-
Return mean over requested axis. NA/null values are excluded
2580-
2581-
Parameters
2582-
----------
2583-
axis : {0, 1}
2584-
0 for row-wise, 1 for column-wise
2585-
skipna : boolean, default True
2586-
Exclude NA/null values. If an entire row/column is NA, the result
2587-
will be NA
2588-
level : integer, default None
2589-
Choose a level to groupby before applying operation
2590-
2591-
Returns
2592-
-------
2593-
mean : Series
2594-
"""
25952533
if not level is None:
25962534
meanfunc = lambda x: x.mean(skipna=skipna)
25972535
return self.groupby(level=level).aggregate(meanfunc)
25982536

25992537
summed = self.sum(axis, numeric_only=True, skipna=skipna)
26002538
count = self.count(axis, numeric_only=True).astype(float)
26012539
return summed / count
2540+
_add_stat_doc(mean, 'mean', 'mean')
26022541

26032542
def quantile(self, q=0.5, axis=0):
26042543
"""
@@ -2632,23 +2571,6 @@ def f(arr):
26322571
return self.apply(f, axis=axis)
26332572

26342573
def median(self, axis=0, skipna=True, level=None):
2635-
"""
2636-
Return median over requested axis, NA/null are exluded
2637-
2638-
Parameters
2639-
----------
2640-
axis : {0, 1}
2641-
0 for row-wise, 1 for column-wise
2642-
skipna : boolean, default True
2643-
Exclude NA/null values. If an entire row/column is NA, the result
2644-
will be NA
2645-
level : integer, default None
2646-
Choose a level to groupby before applying operation
2647-
2648-
Returns
2649-
-------
2650-
Series or TimeSeries
2651-
"""
26522574
if not level is None:
26532575
medianfunc = lambda x: x.median(skipna=skipna)
26542576
return self.groupby(level=level).aggregate(medianfunc)
@@ -2661,25 +2583,9 @@ def median(self, axis=0, skipna=True, level=None):
26612583
return Series(med, index=self.index)
26622584
else:
26632585
raise Exception('Must have 0<= axis <= 1')
2586+
_add_stat_doc(median, 'median', 'median')
26642587

26652588
def mad(self, axis=0, skipna=True, level=None):
2666-
"""
2667-
Return mean absolute deviation over requested axis
2668-
2669-
Parameters
2670-
----------
2671-
axis : {0, 1}
2672-
0 for row-wise, 1 for column-wise
2673-
skipna : boolean, default True
2674-
Exclude NA/null values. If an entire row/column is NA, the result
2675-
will be NA
2676-
level : integer, default None
2677-
Choose a level to groupby before applying operation
2678-
2679-
Returns
2680-
-------
2681-
mad : Series
2682-
"""
26832589
if not level is None:
26842590
madfunc = lambda x: x.mad(skipna=skipna)
26852591
return self.groupby(level=level).aggregate(madfunc)
@@ -2689,25 +2595,9 @@ def mad(self, axis=0, skipna=True, level=None):
26892595
else:
26902596
demeaned = self.sub(self.mean(axis=1), axis=0)
26912597
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
2598+
_add_stat_doc(mad, 'mean absolute deviation', 'mad')
26922599

26932600
def var(self, axis=0, skipna=True, level=None):
2694-
"""
2695-
Return unbiased variance over requested axis
2696-
2697-
Parameters
2698-
----------
2699-
axis : {0, 1}
2700-
0 for row-wise, 1 for column-wise
2701-
skipna : boolean, default True
2702-
Exclude NA/null values. If an entire row/column is NA, the result
2703-
will be NA
2704-
level : integer, default None
2705-
Choose a level to groupby before applying operation
2706-
2707-
Returns
2708-
-------
2709-
var : Series
2710-
"""
27112601
if not level is None:
27122602
varfunc = lambda x: x.var(skipna=skipna)
27132603
return self.groupby(level=level).aggregate(varfunc)
@@ -2726,49 +2616,17 @@ def var(self, axis=0, skipna=True, level=None):
27262616
theVar = (XX - X ** 2 / count) / (count - 1)
27272617

27282618
return Series(theVar, index=axis_labels)
2619+
_add_stat_doc(var, 'unbiased variance', 'var')
27292620

27302621
def std(self, axis=0, skipna=True, level=None):
2731-
"""
2732-
Return unbiased std deviation over requested axis
2733-
2734-
Parameters
2735-
----------
2736-
axis : {0, 1}
2737-
0 for row-wise, 1 for column-wise
2738-
skipna : boolean, default True
2739-
Exclude NA/null values. If an entire row/column is NA, the result
2740-
will be NA
2741-
level : integer, default None
2742-
Choose a level to groupby before applying operation
2743-
2744-
Returns
2745-
-------
2746-
std : Series
2747-
"""
27482622
if not level is None:
27492623
stdfunc = lambda x: x.std(skipna=skipna)
27502624
return self.groupby(level=level).aggregate(stdfunc)
27512625

27522626
return np.sqrt(self.var(axis=axis, skipna=skipna))
2627+
_add_stat_doc(std, 'unbiased standard deviation', 'std')
27532628

27542629
def skew(self, axis=0, skipna=True, level=None):
2755-
"""
2756-
Return unbiased skewness over requested axis
2757-
2758-
Parameters
2759-
----------
2760-
axis : {0, 1}
2761-
0 for row-wise, 1 for column-wise
2762-
skipna : boolean, default True
2763-
Exclude NA/null values. If an entire row/column is NA, the result
2764-
will be NA
2765-
level : integer, default None
2766-
Choose a level to groupby before applying operation
2767-
2768-
Returns
2769-
-------
2770-
skew : Series
2771-
"""
27722630
if not level is None:
27732631
skewfunc = lambda x: x.skew(skipna=skipna)
27742632
return self.groupby(level=level).aggregate(skewfunc)
@@ -2795,6 +2653,7 @@ def skew(self, axis=0, skipna=True, level=None):
27952653
result = np.where(B == 0, 0, result)
27962654

27972655
return Series(result, index=axis_labels)
2656+
_add_stat_doc(skew, 'unbiased skewness', 'skew')
27982657

27992658
def _get_agg_data(self, axis, numeric_only=True, copy=True):
28002659
num_cols = self._get_numeric_columns()

0 commit comments

Comments
 (0)