From e7762acab496c061adfcc3f609bc5528ad3de2c3 Mon Sep 17 00:00:00 2001 From: Anjali Singhal Date: Wed, 19 Sep 2018 00:42:48 +0200 Subject: [PATCH 1/5] TST: Fixturize series/test_analytics.py --- pandas/tests/series/test_analytics.py | 218 +++++++++++++------------- 1 file changed, 108 insertions(+), 110 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index d5d9e5f4f14de..62f6593f567a8 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -24,10 +24,8 @@ assert_frame_equal, assert_index_equal) import pandas.util.testing as tm import pandas.util._test_decorators as td -from .common import TestData - -class TestSeriesAnalytics(TestData): +class TestSeriesAnalytics(): @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [ @@ -194,8 +192,8 @@ def test_sum_overflow(self, use_bottleneck): result = s.max(skipna=False) assert np.allclose(float(result), v[-1]) - def test_sum(self): - self._check_stat_op('sum', np.sum, check_allna=False) + def test_sum(self, string_series): + self._check_stat_op('sum', np.sum, string_series, check_allna=False) def test_sum_inf(self): s = Series(np.random.randn(10)) @@ -215,67 +213,67 @@ def test_sum_inf(self): res = nanops.nansum(arr, axis=1) assert np.isinf(res).all() - def test_mean(self): - self._check_stat_op('mean', np.mean) + def test_mean(self, string_series): + self._check_stat_op('mean', np.mean, string_series) - def test_median(self): - self._check_stat_op('median', np.median) + def test_median(self, string_series): + self._check_stat_op('median', np.median, string_series) # test with integers, test failure int_ts = Series(np.ones(10, dtype=int), index=lrange(10)) tm.assert_almost_equal(np.median(int_ts), int_ts.median()) - def test_prod(self): - self._check_stat_op('prod', np.prod) + def test_prod(self, string_series): + self._check_stat_op('prod', np.prod, string_series) - def test_min(self): - self._check_stat_op('min', np.min, check_objects=True) + def test_min(self, string_series): + self._check_stat_op('min', np.min, string_series, check_objects=True) - def test_max(self): - self._check_stat_op('max', np.max, check_objects=True) + def test_max(self, string_series): + self._check_stat_op('max', np.max, string_series, check_objects=True) - def test_var_std(self): + def test_var_std(self, datetime_series, string_series): alt = lambda x: np.std(x, ddof=1) - self._check_stat_op('std', alt) + self._check_stat_op('std', alt, string_series) alt = lambda x: np.var(x, ddof=1) - self._check_stat_op('var', alt) + self._check_stat_op('var', alt, string_series) - result = self.ts.std(ddof=4) - expected = np.std(self.ts.values, ddof=4) + result = datetime_series.std(ddof=4) + expected = np.std(datetime_series.values, ddof=4) assert_almost_equal(result, expected) - result = self.ts.var(ddof=4) - expected = np.var(self.ts.values, ddof=4) + result = datetime_series.var(ddof=4) + expected = np.var(datetime_series.values, ddof=4) assert_almost_equal(result, expected) # 1 - element series with ddof=1 - s = self.ts.iloc[[0]] + s = datetime_series.iloc[[0]] result = s.var(ddof=1) assert isna(result) result = s.std(ddof=1) assert isna(result) - def test_sem(self): + def test_sem(self, datetime_series, string_series): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - self._check_stat_op('sem', alt) + self._check_stat_op('sem', alt, string_series) - result = self.ts.sem(ddof=4) - expected = np.std(self.ts.values, - ddof=4) / np.sqrt(len(self.ts.values)) + result = datetime_series.sem(ddof=4) + expected = np.std(datetime_series.values, + ddof=4) / np.sqrt(len(datetime_series.values)) assert_almost_equal(result, expected) # 1 - element series with ddof=1 - s = self.ts.iloc[[0]] + s = datetime_series.iloc[[0]] result = s.sem(ddof=1) assert isna(result) @td.skip_if_no_scipy - def test_skew(self): + def test_skew(self, string_series): from scipy.stats import skew alt = lambda x: skew(x, bias=False) - self._check_stat_op('skew', alt) + self._check_stat_op('skew', alt, string_series) # test corner cases, skew() returns NaN unless there's at least 3 # values @@ -291,10 +289,10 @@ def test_skew(self): assert (df.skew() == 0).all() @td.skip_if_no_scipy - def test_kurt(self): + def test_kurt(self, string_series): from scipy.stats import kurtosis alt = lambda x: kurtosis(x, bias=False) - self._check_stat_op('kurt', alt) + self._check_stat_op('kurt', alt, string_series) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], @@ -353,9 +351,9 @@ def test_describe_with_tz(self, tz_naive_fixture): ) tm.assert_series_equal(result, expected) - def test_argsort(self): - self._check_accum_op('argsort', check_dtype=False) - argsorted = self.ts.argsort() + def test_argsort(self, datetime_series): + self._check_accum_op('argsort', datetime_series, check_dtype=False) + argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) # GH 2967 (introduced bug in 0.11-dev I think) @@ -388,26 +386,26 @@ def test_argsort_stable(self): pytest.raises(AssertionError, tm.assert_numpy_array_equal, qindexer, mindexer) - def test_cumsum(self): - self._check_accum_op('cumsum') + def test_cumsum(self, datetime_series): + self._check_accum_op('cumsum', datetime_series) - def test_cumprod(self): - self._check_accum_op('cumprod') + def test_cumprod(self, datetime_series): + self._check_accum_op('cumprod', datetime_series) - def test_cummin(self): - tm.assert_numpy_array_equal(self.ts.cummin().values, - np.minimum.accumulate(np.array(self.ts))) - ts = self.ts.copy() + def test_cummin(self, datetime_series): + tm.assert_numpy_array_equal(datetime_series.cummin().values, + np.minimum.accumulate(np.array(datetime_series))) + ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummin()[1::2] expected = np.minimum.accumulate(ts.dropna()) tm.assert_series_equal(result, expected) - def test_cummax(self): - tm.assert_numpy_array_equal(self.ts.cummax().values, - np.maximum.accumulate(np.array(self.ts))) - ts = self.ts.copy() + def test_cummax(self, datetime_series): + tm.assert_numpy_array_equal(datetime_series.cummax().values, + np.maximum.accumulate(np.array(datetime_series))) + ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummax()[1::2] expected = np.maximum.accumulate(ts.dropna()) @@ -506,14 +504,14 @@ def test_npdiff(self): r = np.diff(s) assert_series_equal(Series([nan, 0, 0, 0, nan]), r) - def _check_stat_op(self, name, alternate, check_objects=False, + def _check_stat_op(self, name, alternate, string_series_, check_objects=False, check_allna=False): with pd.option_context('use_bottleneck', False): f = getattr(Series, name) # add some NaNs - self.series[5:15] = np.NaN + string_series_[5:15] = np.NaN # idxmax, idxmin, min, and max are valid for dates if name not in ['max', 'min']: @@ -521,15 +519,15 @@ def _check_stat_op(self, name, alternate, check_objects=False, pytest.raises(TypeError, f, ds) # skipna or no - assert notna(f(self.series)) - assert isna(f(self.series, skipna=False)) + assert notna(f(string_series_)) + assert isna(f(string_series_, skipna=False)) # check the result is correct - nona = self.series.dropna() + nona = string_series_.dropna() assert_almost_equal(f(nona), alternate(nona.values)) - assert_almost_equal(f(self.series), alternate(nona.values)) + assert_almost_equal(f(string_series_), alternate(nona.values)) - allna = self.series * nan + allna = string_series_ * nan if check_allna: assert np.isnan(f(allna)) @@ -556,21 +554,21 @@ def _check_stat_op(self, name, alternate, check_objects=False, pytest.raises(TypeError, f, Series(list('abc'))) # Invalid axis. - pytest.raises(ValueError, f, self.series, axis=1) + pytest.raises(ValueError, f, string_series_, axis=1) # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: tm.assert_raises_regex(NotImplementedError, name, f, - self.series, numeric_only=True) + string_series_, numeric_only=True) - def _check_accum_op(self, name, check_dtype=True): + def _check_accum_op(self, name, datetime_series_, check_dtype=True): func = getattr(np, name) - tm.assert_numpy_array_equal(func(self.ts).values, - func(np.array(self.ts)), + tm.assert_numpy_array_equal(func(datetime_series_).values, + func(np.array(datetime_series_)), check_dtype=check_dtype) # with missing values - ts = self.ts.copy() + ts = datetime_series_.copy() ts[::2] = np.NaN result = func(ts)[1::2] @@ -607,13 +605,13 @@ def test_numpy_compress(self): tm.assert_raises_regex(ValueError, msg, np.compress, cond, s, out=s) - def test_round(self): - self.ts.index.name = "index_name" - result = self.ts.round(2) - expected = Series(np.round(self.ts.values, 2), - index=self.ts.index, name='ts') + def test_round(self, datetime_series): + datetime_series.index.name = "index_name" + result = datetime_series.round(2) + expected = Series(np.round(datetime_series.values, 2), + index=datetime_series.index, name='ts') assert_series_equal(result, expected) - assert result.name == self.ts.name + assert result.name == datetime_series.name def test_numpy_round(self): # See gh-12600 @@ -717,26 +715,26 @@ def test_modulo(self): assert_series_equal(result, expected) @td.skip_if_no_scipy - def test_corr(self): + def test_corr(self, datetime_series): import scipy.stats as stats # full overlap - tm.assert_almost_equal(self.ts.corr(self.ts), 1) + tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) # partial overlap - tm.assert_almost_equal(self.ts[:15].corr(self.ts[5:]), 1) + tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) - assert isna(self.ts[:15].corr(self.ts[5:], min_periods=12)) + assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) - ts1 = self.ts[:15].reindex(self.ts.index) - ts2 = self.ts[5:].reindex(self.ts.index) + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) assert isna(ts1.corr(ts2, min_periods=12)) # No overlap - assert np.isnan(self.ts[::2].corr(self.ts[1::2])) + assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) # all NA - cp = self.ts[:10].copy() + cp = datetime_series[:10].copy() cp[:] = np.nan assert isna(cp.corr(cp)) @@ -789,35 +787,35 @@ def test_corr_invalid_method(self): with tm.assert_raises_regex(ValueError, msg): s1.corr(s2, method="____") - def test_cov(self): + def test_cov(self, datetime_series): # full overlap - tm.assert_almost_equal(self.ts.cov(self.ts), self.ts.std() ** 2) + tm.assert_almost_equal(datetime_series.cov(datetime_series), datetime_series.std() ** 2) # partial overlap - tm.assert_almost_equal(self.ts[:15].cov(self.ts[5:]), - self.ts[5:15].std() ** 2) + tm.assert_almost_equal(datetime_series[:15].cov(datetime_series[5:]), + datetime_series[5:15].std() ** 2) # No overlap - assert np.isnan(self.ts[::2].cov(self.ts[1::2])) + assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) # all NA - cp = self.ts[:10].copy() + cp = datetime_series[:10].copy() cp[:] = np.nan assert isna(cp.cov(cp)) # min_periods - assert isna(self.ts[:15].cov(self.ts[5:], min_periods=12)) + assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) - ts1 = self.ts[:15].reindex(self.ts.index) - ts2 = self.ts[5:].reindex(self.ts.index) + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) assert isna(ts1.cov(ts2, min_periods=12)) - def test_count(self): - assert self.ts.count() == len(self.ts) + def test_count(self, datetime_series): + assert datetime_series.count() == len(datetime_series) - self.ts[::2] = np.NaN + datetime_series[::2] = np.NaN - assert self.ts.count() == np.isfinite(self.ts).sum() + assert datetime_series.count() == np.isfinite(datetime_series).sum() mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) @@ -920,17 +918,17 @@ def test_matmul(self): pytest.raises(Exception, a.dot, a.values[:3]) pytest.raises(ValueError, a.dot, b.T) - def test_clip(self): - val = self.ts.median() + def test_clip(self, datetime_series): + val = datetime_series.median() - assert self.ts.clip_lower(val).min() == val - assert self.ts.clip_upper(val).max() == val + assert datetime_series.clip_lower(val).min() == val + assert datetime_series.clip_upper(val).max() == val - assert self.ts.clip(lower=val).min() == val - assert self.ts.clip(upper=val).max() == val + assert datetime_series.clip(lower=val).min() == val + assert datetime_series.clip(upper=val).max() == val - result = self.ts.clip(-0.5, 0.5) - expected = np.clip(self.ts, -0.5, 0.5) + result = datetime_series.clip(-0.5, 0.5) + expected = np.clip(datetime_series, -0.5, 0.5) assert_series_equal(result, expected) assert isinstance(expected, Series) @@ -1164,25 +1162,25 @@ def test_timedelta64_analytics(self): expected = Timedelta('1 days') assert result == expected - def test_idxmin(self): + def test_idxmin(self, string_series): # test idxmin # _check_stat_op approach can not be used here because of isna check. # add some NaNs - self.series[5:15] = np.NaN + string_series[5:15] = np.NaN # skipna or no - assert self.series[self.series.idxmin()] == self.series.min() - assert isna(self.series.idxmin(skipna=False)) + assert string_series[string_series.idxmin()] == string_series.min() + assert isna(string_series.idxmin(skipna=False)) # no NaNs - nona = self.series.dropna() + nona = string_series.dropna() assert nona[nona.idxmin()] == nona.min() assert (nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin()) # all NaNs - allna = self.series * nan + allna = string_series * nan assert isna(allna.idxmin()) # datetime64[ns] @@ -1221,25 +1219,25 @@ def test_numpy_argmin_deprecated(self): tm.assert_raises_regex(ValueError, msg, np.argmin, s, out=data) - def test_idxmax(self): + def test_idxmax(self, string_series): # test idxmax # _check_stat_op approach can not be used here because of isna check. # add some NaNs - self.series[5:15] = np.NaN + string_series[5:15] = np.NaN # skipna or no - assert self.series[self.series.idxmax()] == self.series.max() - assert isna(self.series.idxmax(skipna=False)) + assert string_series[string_series.idxmax()] == string_series.max() + assert isna(string_series.idxmax(skipna=False)) # no NaNs - nona = self.series.dropna() + nona = string_series.dropna() assert nona[nona.idxmax()] == nona.max() assert (nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax()) # all NaNs - allna = self.series * nan + allna = string_series * nan assert isna(allna.idxmax()) from pandas import date_range @@ -1458,8 +1456,8 @@ def test_apply_categorical(self): tm.assert_series_equal(result, exp) assert result.dtype == np.object - def test_shift_int(self): - ts = self.ts.astype(int) + def test_shift_int(self, datetime_series): + ts = datetime_series.astype(int) shifted = ts.shift(1) expected = ts.astype(float).shift(1) assert_series_equal(shifted, expected) From 368b7562a2a280a2388a352c8d918aba81170443 Mon Sep 17 00:00:00 2001 From: Anjali Singhal Date: Wed, 19 Sep 2018 00:56:32 +0200 Subject: [PATCH 2/5] Fixing PEP8 issues --- pandas/tests/series/test_analytics.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 62f6593f567a8..9dbe0a719f484 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -25,6 +25,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td + class TestSeriesAnalytics(): @pytest.mark.parametrize("use_bottleneck", [True, False]) @@ -394,7 +395,8 @@ def test_cumprod(self, datetime_series): def test_cummin(self, datetime_series): tm.assert_numpy_array_equal(datetime_series.cummin().values, - np.minimum.accumulate(np.array(datetime_series))) + np.minimum.accumulate( + np.array(datetime_series))) ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummin()[1::2] @@ -404,7 +406,8 @@ def test_cummin(self, datetime_series): def test_cummax(self, datetime_series): tm.assert_numpy_array_equal(datetime_series.cummax().values, - np.maximum.accumulate(np.array(datetime_series))) + np.maximum.accumulate( + np.array(datetime_series))) ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummax()[1::2] @@ -504,8 +507,8 @@ def test_npdiff(self): r = np.diff(s) assert_series_equal(Series([nan, 0, 0, 0, nan]), r) - def _check_stat_op(self, name, alternate, string_series_, check_objects=False, - check_allna=False): + def _check_stat_op(self, name, alternate, string_series_, + check_objects=False, check_allna=False): with pd.option_context('use_bottleneck', False): f = getattr(Series, name) @@ -722,9 +725,11 @@ def test_corr(self, datetime_series): tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) # partial overlap - tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) + tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), + 1) - assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) + assert isna(datetime_series[:15].corr(datetime_series[5:], + min_periods=12)) ts1 = datetime_series[:15].reindex(datetime_series.index) ts2 = datetime_series[5:].reindex(datetime_series.index) @@ -789,7 +794,8 @@ def test_corr_invalid_method(self): def test_cov(self, datetime_series): # full overlap - tm.assert_almost_equal(datetime_series.cov(datetime_series), datetime_series.std() ** 2) + tm.assert_almost_equal(datetime_series.cov(datetime_series), + datetime_series.std() ** 2) # partial overlap tm.assert_almost_equal(datetime_series[:15].cov(datetime_series[5:]), @@ -804,7 +810,8 @@ def test_cov(self, datetime_series): assert isna(cp.cov(cp)) # min_periods - assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) + assert isna(datetime_series[:15].cov(datetime_series[5:], + min_periods=12)) ts1 = datetime_series[:15].reindex(datetime_series.index) ts2 = datetime_series[5:].reindex(datetime_series.index) From 3678a62e75bc6b83265195450f4f1283c732c605 Mon Sep 17 00:00:00 2001 From: Anjali Singhal Date: Wed, 19 Sep 2018 01:23:38 +0200 Subject: [PATCH 3/5] Fixing PEP8 issues --- pandas/tests/series/test_analytics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 9dbe0a719f484..3fec2c6a3baba 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -395,8 +395,8 @@ def test_cumprod(self, datetime_series): def test_cummin(self, datetime_series): tm.assert_numpy_array_equal(datetime_series.cummin().values, - np.minimum.accumulate( - np.array(datetime_series))) + np.minimum + .accumulate(np.array(datetime_series))) ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummin()[1::2] @@ -406,8 +406,8 @@ def test_cummin(self, datetime_series): def test_cummax(self, datetime_series): tm.assert_numpy_array_equal(datetime_series.cummax().values, - np.maximum.accumulate( - np.array(datetime_series))) + np.maximum + .accumulate(np.array(datetime_series))) ts = datetime_series.copy() ts[::2] = np.NaN result = ts.cummax()[1::2] From 795cdc1d5d5cbcc351a4e5aa61d3810d2cb48172 Mon Sep 17 00:00:00 2001 From: Anjali Singhal Date: Wed, 19 Sep 2018 01:32:39 +0200 Subject: [PATCH 4/5] TST: Fixturize series/test_validate.py --- pandas/tests/series/test_validate.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index a0cde5f81d021..8c4b6ee5b1d75 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,14 +1,7 @@ -from pandas.core.series import Series - import pytest import pandas.util.testing as tm -@pytest.fixture -def series(): - return Series([1, 2, 3, 4, 5]) - - class TestSeriesValidate(object): """Tests for error handling related to data types of method arguments.""" @@ -16,7 +9,7 @@ class TestSeriesValidate(object): "sort_values", "sort_index", "rename", "dropna"]) @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) - def test_validate_bool_args(self, series, func, inplace): + def test_validate_bool_args(self, string_series, func, inplace): msg = "For argument \"inplace\" expected type bool" kwargs = dict(inplace=inplace) @@ -24,4 +17,4 @@ def test_validate_bool_args(self, series, func, inplace): kwargs["name"] = "hello" with tm.assert_raises_regex(ValueError, msg): - getattr(series, func)(**kwargs) + getattr(string_series, func)(**kwargs) From 2711fda0ce41a783a432ba832e3c9763af62b2f8 Mon Sep 17 00:00:00 2001 From: Anjali Singhal Date: Sun, 23 Sep 2018 12:48:27 +0200 Subject: [PATCH 5/5] Revert "Fixing PEP8 issues" This reverts commit 3678a62e75bc6b83265195450f4f1283c732c605. --- pandas/tests/series/test_analytics.py | 225 +++++++++++++------------- 1 file changed, 110 insertions(+), 115 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 3fec2c6a3baba..d5d9e5f4f14de 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -24,9 +24,10 @@ assert_frame_equal, assert_index_equal) import pandas.util.testing as tm import pandas.util._test_decorators as td +from .common import TestData -class TestSeriesAnalytics(): +class TestSeriesAnalytics(TestData): @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [ @@ -193,8 +194,8 @@ def test_sum_overflow(self, use_bottleneck): result = s.max(skipna=False) assert np.allclose(float(result), v[-1]) - def test_sum(self, string_series): - self._check_stat_op('sum', np.sum, string_series, check_allna=False) + def test_sum(self): + self._check_stat_op('sum', np.sum, check_allna=False) def test_sum_inf(self): s = Series(np.random.randn(10)) @@ -214,67 +215,67 @@ def test_sum_inf(self): res = nanops.nansum(arr, axis=1) assert np.isinf(res).all() - def test_mean(self, string_series): - self._check_stat_op('mean', np.mean, string_series) + def test_mean(self): + self._check_stat_op('mean', np.mean) - def test_median(self, string_series): - self._check_stat_op('median', np.median, string_series) + def test_median(self): + self._check_stat_op('median', np.median) # test with integers, test failure int_ts = Series(np.ones(10, dtype=int), index=lrange(10)) tm.assert_almost_equal(np.median(int_ts), int_ts.median()) - def test_prod(self, string_series): - self._check_stat_op('prod', np.prod, string_series) + def test_prod(self): + self._check_stat_op('prod', np.prod) - def test_min(self, string_series): - self._check_stat_op('min', np.min, string_series, check_objects=True) + def test_min(self): + self._check_stat_op('min', np.min, check_objects=True) - def test_max(self, string_series): - self._check_stat_op('max', np.max, string_series, check_objects=True) + def test_max(self): + self._check_stat_op('max', np.max, check_objects=True) - def test_var_std(self, datetime_series, string_series): + def test_var_std(self): alt = lambda x: np.std(x, ddof=1) - self._check_stat_op('std', alt, string_series) + self._check_stat_op('std', alt) alt = lambda x: np.var(x, ddof=1) - self._check_stat_op('var', alt, string_series) + self._check_stat_op('var', alt) - result = datetime_series.std(ddof=4) - expected = np.std(datetime_series.values, ddof=4) + result = self.ts.std(ddof=4) + expected = np.std(self.ts.values, ddof=4) assert_almost_equal(result, expected) - result = datetime_series.var(ddof=4) - expected = np.var(datetime_series.values, ddof=4) + result = self.ts.var(ddof=4) + expected = np.var(self.ts.values, ddof=4) assert_almost_equal(result, expected) # 1 - element series with ddof=1 - s = datetime_series.iloc[[0]] + s = self.ts.iloc[[0]] result = s.var(ddof=1) assert isna(result) result = s.std(ddof=1) assert isna(result) - def test_sem(self, datetime_series, string_series): + def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - self._check_stat_op('sem', alt, string_series) + self._check_stat_op('sem', alt) - result = datetime_series.sem(ddof=4) - expected = np.std(datetime_series.values, - ddof=4) / np.sqrt(len(datetime_series.values)) + result = self.ts.sem(ddof=4) + expected = np.std(self.ts.values, + ddof=4) / np.sqrt(len(self.ts.values)) assert_almost_equal(result, expected) # 1 - element series with ddof=1 - s = datetime_series.iloc[[0]] + s = self.ts.iloc[[0]] result = s.sem(ddof=1) assert isna(result) @td.skip_if_no_scipy - def test_skew(self, string_series): + def test_skew(self): from scipy.stats import skew alt = lambda x: skew(x, bias=False) - self._check_stat_op('skew', alt, string_series) + self._check_stat_op('skew', alt) # test corner cases, skew() returns NaN unless there's at least 3 # values @@ -290,10 +291,10 @@ def test_skew(self, string_series): assert (df.skew() == 0).all() @td.skip_if_no_scipy - def test_kurt(self, string_series): + def test_kurt(self): from scipy.stats import kurtosis alt = lambda x: kurtosis(x, bias=False) - self._check_stat_op('kurt', alt, string_series) + self._check_stat_op('kurt', alt) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], @@ -352,9 +353,9 @@ def test_describe_with_tz(self, tz_naive_fixture): ) tm.assert_series_equal(result, expected) - def test_argsort(self, datetime_series): - self._check_accum_op('argsort', datetime_series, check_dtype=False) - argsorted = datetime_series.argsort() + def test_argsort(self): + self._check_accum_op('argsort', check_dtype=False) + argsorted = self.ts.argsort() assert issubclass(argsorted.dtype.type, np.integer) # GH 2967 (introduced bug in 0.11-dev I think) @@ -387,28 +388,26 @@ def test_argsort_stable(self): pytest.raises(AssertionError, tm.assert_numpy_array_equal, qindexer, mindexer) - def test_cumsum(self, datetime_series): - self._check_accum_op('cumsum', datetime_series) + def test_cumsum(self): + self._check_accum_op('cumsum') - def test_cumprod(self, datetime_series): - self._check_accum_op('cumprod', datetime_series) + def test_cumprod(self): + self._check_accum_op('cumprod') - def test_cummin(self, datetime_series): - tm.assert_numpy_array_equal(datetime_series.cummin().values, - np.minimum - .accumulate(np.array(datetime_series))) - ts = datetime_series.copy() + def test_cummin(self): + tm.assert_numpy_array_equal(self.ts.cummin().values, + np.minimum.accumulate(np.array(self.ts))) + ts = self.ts.copy() ts[::2] = np.NaN result = ts.cummin()[1::2] expected = np.minimum.accumulate(ts.dropna()) tm.assert_series_equal(result, expected) - def test_cummax(self, datetime_series): - tm.assert_numpy_array_equal(datetime_series.cummax().values, - np.maximum - .accumulate(np.array(datetime_series))) - ts = datetime_series.copy() + def test_cummax(self): + tm.assert_numpy_array_equal(self.ts.cummax().values, + np.maximum.accumulate(np.array(self.ts))) + ts = self.ts.copy() ts[::2] = np.NaN result = ts.cummax()[1::2] expected = np.maximum.accumulate(ts.dropna()) @@ -507,14 +506,14 @@ def test_npdiff(self): r = np.diff(s) assert_series_equal(Series([nan, 0, 0, 0, nan]), r) - def _check_stat_op(self, name, alternate, string_series_, - check_objects=False, check_allna=False): + def _check_stat_op(self, name, alternate, check_objects=False, + check_allna=False): with pd.option_context('use_bottleneck', False): f = getattr(Series, name) # add some NaNs - string_series_[5:15] = np.NaN + self.series[5:15] = np.NaN # idxmax, idxmin, min, and max are valid for dates if name not in ['max', 'min']: @@ -522,15 +521,15 @@ def _check_stat_op(self, name, alternate, string_series_, pytest.raises(TypeError, f, ds) # skipna or no - assert notna(f(string_series_)) - assert isna(f(string_series_, skipna=False)) + assert notna(f(self.series)) + assert isna(f(self.series, skipna=False)) # check the result is correct - nona = string_series_.dropna() + nona = self.series.dropna() assert_almost_equal(f(nona), alternate(nona.values)) - assert_almost_equal(f(string_series_), alternate(nona.values)) + assert_almost_equal(f(self.series), alternate(nona.values)) - allna = string_series_ * nan + allna = self.series * nan if check_allna: assert np.isnan(f(allna)) @@ -557,21 +556,21 @@ def _check_stat_op(self, name, alternate, string_series_, pytest.raises(TypeError, f, Series(list('abc'))) # Invalid axis. - pytest.raises(ValueError, f, string_series_, axis=1) + pytest.raises(ValueError, f, self.series, axis=1) # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: tm.assert_raises_regex(NotImplementedError, name, f, - string_series_, numeric_only=True) + self.series, numeric_only=True) - def _check_accum_op(self, name, datetime_series_, check_dtype=True): + def _check_accum_op(self, name, check_dtype=True): func = getattr(np, name) - tm.assert_numpy_array_equal(func(datetime_series_).values, - func(np.array(datetime_series_)), + tm.assert_numpy_array_equal(func(self.ts).values, + func(np.array(self.ts)), check_dtype=check_dtype) # with missing values - ts = datetime_series_.copy() + ts = self.ts.copy() ts[::2] = np.NaN result = func(ts)[1::2] @@ -608,13 +607,13 @@ def test_numpy_compress(self): tm.assert_raises_regex(ValueError, msg, np.compress, cond, s, out=s) - def test_round(self, datetime_series): - datetime_series.index.name = "index_name" - result = datetime_series.round(2) - expected = Series(np.round(datetime_series.values, 2), - index=datetime_series.index, name='ts') + def test_round(self): + self.ts.index.name = "index_name" + result = self.ts.round(2) + expected = Series(np.round(self.ts.values, 2), + index=self.ts.index, name='ts') assert_series_equal(result, expected) - assert result.name == datetime_series.name + assert result.name == self.ts.name def test_numpy_round(self): # See gh-12600 @@ -718,28 +717,26 @@ def test_modulo(self): assert_series_equal(result, expected) @td.skip_if_no_scipy - def test_corr(self, datetime_series): + def test_corr(self): import scipy.stats as stats # full overlap - tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) + tm.assert_almost_equal(self.ts.corr(self.ts), 1) # partial overlap - tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), - 1) + tm.assert_almost_equal(self.ts[:15].corr(self.ts[5:]), 1) - assert isna(datetime_series[:15].corr(datetime_series[5:], - min_periods=12)) + assert isna(self.ts[:15].corr(self.ts[5:], min_periods=12)) - ts1 = datetime_series[:15].reindex(datetime_series.index) - ts2 = datetime_series[5:].reindex(datetime_series.index) + ts1 = self.ts[:15].reindex(self.ts.index) + ts2 = self.ts[5:].reindex(self.ts.index) assert isna(ts1.corr(ts2, min_periods=12)) # No overlap - assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) + assert np.isnan(self.ts[::2].corr(self.ts[1::2])) # all NA - cp = datetime_series[:10].copy() + cp = self.ts[:10].copy() cp[:] = np.nan assert isna(cp.corr(cp)) @@ -792,37 +789,35 @@ def test_corr_invalid_method(self): with tm.assert_raises_regex(ValueError, msg): s1.corr(s2, method="____") - def test_cov(self, datetime_series): + def test_cov(self): # full overlap - tm.assert_almost_equal(datetime_series.cov(datetime_series), - datetime_series.std() ** 2) + tm.assert_almost_equal(self.ts.cov(self.ts), self.ts.std() ** 2) # partial overlap - tm.assert_almost_equal(datetime_series[:15].cov(datetime_series[5:]), - datetime_series[5:15].std() ** 2) + tm.assert_almost_equal(self.ts[:15].cov(self.ts[5:]), + self.ts[5:15].std() ** 2) # No overlap - assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) + assert np.isnan(self.ts[::2].cov(self.ts[1::2])) # all NA - cp = datetime_series[:10].copy() + cp = self.ts[:10].copy() cp[:] = np.nan assert isna(cp.cov(cp)) # min_periods - assert isna(datetime_series[:15].cov(datetime_series[5:], - min_periods=12)) + assert isna(self.ts[:15].cov(self.ts[5:], min_periods=12)) - ts1 = datetime_series[:15].reindex(datetime_series.index) - ts2 = datetime_series[5:].reindex(datetime_series.index) + ts1 = self.ts[:15].reindex(self.ts.index) + ts2 = self.ts[5:].reindex(self.ts.index) assert isna(ts1.cov(ts2, min_periods=12)) - def test_count(self, datetime_series): - assert datetime_series.count() == len(datetime_series) + def test_count(self): + assert self.ts.count() == len(self.ts) - datetime_series[::2] = np.NaN + self.ts[::2] = np.NaN - assert datetime_series.count() == np.isfinite(datetime_series).sum() + assert self.ts.count() == np.isfinite(self.ts).sum() mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) @@ -925,17 +920,17 @@ def test_matmul(self): pytest.raises(Exception, a.dot, a.values[:3]) pytest.raises(ValueError, a.dot, b.T) - def test_clip(self, datetime_series): - val = datetime_series.median() + def test_clip(self): + val = self.ts.median() - assert datetime_series.clip_lower(val).min() == val - assert datetime_series.clip_upper(val).max() == val + assert self.ts.clip_lower(val).min() == val + assert self.ts.clip_upper(val).max() == val - assert datetime_series.clip(lower=val).min() == val - assert datetime_series.clip(upper=val).max() == val + assert self.ts.clip(lower=val).min() == val + assert self.ts.clip(upper=val).max() == val - result = datetime_series.clip(-0.5, 0.5) - expected = np.clip(datetime_series, -0.5, 0.5) + result = self.ts.clip(-0.5, 0.5) + expected = np.clip(self.ts, -0.5, 0.5) assert_series_equal(result, expected) assert isinstance(expected, Series) @@ -1169,25 +1164,25 @@ def test_timedelta64_analytics(self): expected = Timedelta('1 days') assert result == expected - def test_idxmin(self, string_series): + def test_idxmin(self): # test idxmin # _check_stat_op approach can not be used here because of isna check. # add some NaNs - string_series[5:15] = np.NaN + self.series[5:15] = np.NaN # skipna or no - assert string_series[string_series.idxmin()] == string_series.min() - assert isna(string_series.idxmin(skipna=False)) + assert self.series[self.series.idxmin()] == self.series.min() + assert isna(self.series.idxmin(skipna=False)) # no NaNs - nona = string_series.dropna() + nona = self.series.dropna() assert nona[nona.idxmin()] == nona.min() assert (nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin()) # all NaNs - allna = string_series * nan + allna = self.series * nan assert isna(allna.idxmin()) # datetime64[ns] @@ -1226,25 +1221,25 @@ def test_numpy_argmin_deprecated(self): tm.assert_raises_regex(ValueError, msg, np.argmin, s, out=data) - def test_idxmax(self, string_series): + def test_idxmax(self): # test idxmax # _check_stat_op approach can not be used here because of isna check. # add some NaNs - string_series[5:15] = np.NaN + self.series[5:15] = np.NaN # skipna or no - assert string_series[string_series.idxmax()] == string_series.max() - assert isna(string_series.idxmax(skipna=False)) + assert self.series[self.series.idxmax()] == self.series.max() + assert isna(self.series.idxmax(skipna=False)) # no NaNs - nona = string_series.dropna() + nona = self.series.dropna() assert nona[nona.idxmax()] == nona.max() assert (nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax()) # all NaNs - allna = string_series * nan + allna = self.series * nan assert isna(allna.idxmax()) from pandas import date_range @@ -1463,8 +1458,8 @@ def test_apply_categorical(self): tm.assert_series_equal(result, exp) assert result.dtype == np.object - def test_shift_int(self, datetime_series): - ts = datetime_series.astype(int) + def test_shift_int(self): + ts = self.ts.astype(int) shifted = ts.shift(1) expected = ts.astype(float).shift(1) assert_series_equal(shifted, expected)