Skip to content

Commit 8f28e2f

Browse files
dnmillerwesm
authored andcommitted
DOC: Adding details on normalization for variance functions.
1 parent 776ce2f commit 8f28e2f

File tree

3 files changed

+31
-4
lines changed

3 files changed

+31
-4
lines changed

doc/source/gotchas.rst

+10
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,13 @@ of the new set of columns rather than the original ones:
302302
:suppress:
303303
304304
os.remove('tmp.csv')
305+
306+
307+
Differences with NumPy
308+
----------------------
309+
For Series and DataFrame objects, ``var`` normalizes by ``N-1`` to produce
310+
unbiased estimates of the sample variance, while NumPy's ``var`` normalizes
311+
by N, which measures the variance of the sample. Note that ``cov``
312+
normalizes by ``N-1`` in both pandas and NumPy.
313+
314+

pandas/core/frame.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -4052,6 +4052,9 @@ def cov(self):
40524052
Returns
40534053
-------
40544054
y : DataFrame
4055+
4056+
y contains the covariance matrix of the DataFrame's time series.
4057+
The covariance is normalized by N-1 (unbiased estimator).
40554058
"""
40564059
numeric_df = self._get_numeric_data()
40574060
cols = numeric_df.columns
@@ -4368,7 +4371,10 @@ def mad(self, axis=0, skipna=True, level=None):
43684371

43694372
@Substitution(name='variance', shortname='var',
43704373
na_action=_doc_exclude_na, extras='')
4371-
@Appender(_stat_doc)
4374+
@Appender(_stat_doc +
4375+
"""
4376+
Normalized by N-1 (unbiased estimator).
4377+
""")
43724378
def var(self, axis=0, skipna=True, level=None, ddof=1):
43734379
if level is not None:
43744380
return self._agg_by_level('var', axis=axis, level=level,
@@ -4378,7 +4384,10 @@ def var(self, axis=0, skipna=True, level=None, ddof=1):
43784384

43794385
@Substitution(name='standard deviation', shortname='std',
43804386
na_action=_doc_exclude_na, extras='')
4381-
@Appender(_stat_doc)
4387+
@Appender(_stat_doc +
4388+
"""
4389+
Normalized by N-1 (unbiased estimator).
4390+
""")
43824391
def std(self, axis=0, skipna=True, level=None, ddof=1):
43834392
if level is not None:
43844393
return self._agg_by_level('std', axis=axis, level=level,

pandas/core/series.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -1151,7 +1151,10 @@ def max(self, axis=None, out=None, skipna=True, level=None):
11511151

11521152
@Substitution(name='standard deviation', shortname='stdev',
11531153
na_action=_doc_exclude_na, extras='')
1154-
@Appender(_stat_doc)
1154+
@Appender(_stat_doc +
1155+
"""
1156+
Normalized by N-1 (unbiased estimator).
1157+
""")
11551158
def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
11561159
level=None):
11571160
if level is not None:
@@ -1161,7 +1164,10 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
11611164

11621165
@Substitution(name='variance', shortname='var',
11631166
na_action=_doc_exclude_na, extras='')
1164-
@Appender(_stat_doc)
1167+
@Appender(_stat_doc +
1168+
"""
1169+
Normalized by N-1 (unbiased estimator).
1170+
""")
11651171
def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
11661172
level=None):
11671173
if level is not None:
@@ -1474,6 +1480,8 @@ def cov(self, other):
14741480
Returns
14751481
-------
14761482
covariance : float
1483+
1484+
Normalized by N-1 (unbiased estimator).
14771485
"""
14781486
this, other = self.align(other, join='inner')
14791487
if len(this) == 0:

0 commit comments

Comments
 (0)