From f699cd27ad8a3f89712a5950b4b88da0a90f3685 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 18 Sep 2018 01:39:27 +0200 Subject: [PATCH 01/13] Fixturize tests/frame/test_arithmetic --- pandas/tests/frame/conftest.py | 18 +-- pandas/tests/frame/test_arithmetic.py | 193 ++++++++++---------------- 2 files changed, 83 insertions(+), 128 deletions(-) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index fdedb93835d75..4a4ce4540b9d5 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -70,9 +70,10 @@ def mixed_float_frame(): Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) - df.A = df.A.astype('float16') + df.A = df.A.astype('float32') df.B = df.B.astype('float32') - df.C = df.C.astype('float64') + df.C = df.C.astype('float16') + df.D = df.D.astype('float64') return df @@ -84,9 +85,10 @@ def mixed_float_frame2(): Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) - df.D = df.D.astype('float16') + df.D = df.D.astype('float32') df.C = df.C.astype('float32') - df.B = df.B.astype('float64') + df.B = df.B.astype('float16') + df.D = df.D.astype('float64') return df @@ -99,10 +101,10 @@ def mixed_int_frame(): """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) - df.A = df.A.astype('uint8') - df.B = df.B.astype('int32') - df.C = df.C.astype('int64') - df.D = np.ones(len(df.D), dtype='uint64') + df.A = df.A.astype('int32') + df.B = np.ones(len(df.B), dtype='uint64') + df.C = df.C.astype('uint8') + df.D = df.C.astype('int64') return df diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 9c61f13b944ea..1da208b2ec627 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -4,8 +4,7 @@ import pytest import numpy as np -from pandas.compat import range, PY3 -import pandas.io.formats.printing as printing +from pandas.compat import range import pandas as pd import pandas.util.testing as tm @@ -127,132 +126,86 @@ def test_df_add_flex_filled_mixed_dtypes(self): 'B': ser * 2}) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame(self): - seriesd = tm.getSeriesData() - frame = pd.DataFrame(seriesd).copy() - - mixed_float = pd.DataFrame({'A': frame['A'].copy().astype('float32'), - 'B': frame['B'].copy().astype('float32'), - 'C': frame['C'].copy().astype('float16'), - 'D': frame['D'].copy().astype('float64')}) - - intframe = pd.DataFrame({k: v.astype(int) - for k, v in seriesd.items()}) - mixed_int = pd.DataFrame({'A': intframe['A'].copy().astype('int32'), - 'B': np.ones(len(intframe), dtype='uint64'), - 'C': intframe['C'].copy().astype('uint8'), - 'D': intframe['D'].copy().astype('int64')}) - - # force these all to int64 to avoid platform testing issues - intframe = pd.DataFrame({c: s for c, s in intframe.items()}, - dtype=np.int64) - - ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] - if not PY3: - aliases = {} - else: - aliases = {'div': 'truediv'} - - for op in ops: - try: - alias = aliases.get(op, op) - f = getattr(operator, alias) - result = getattr(frame, op)(2 * frame) - exp = f(frame, 2 * frame) - tm.assert_frame_equal(result, exp) - - # vs mix float - result = getattr(mixed_float, op)(2 * mixed_float) - exp = f(mixed_float, 2 * mixed_float) - tm.assert_frame_equal(result, exp) - _check_mixed_float(result, dtype=dict(C=None)) - - # vs mix int - if op in ['add', 'sub', 'mul']: - result = getattr(mixed_int, op)(2 + mixed_int) - exp = f(mixed_int, 2 + mixed_int) - - # no overflow in the uint - dtype = None - if op in ['sub']: - dtype = dict(B='uint64', C=None) - elif op in ['add', 'mul']: - dtype = dict(C=None) - tm.assert_frame_equal(result, exp) - _check_mixed_int(result, dtype=dtype) - - # rops - r_f = lambda x, y: f(y, x) - result = getattr(frame, 'r' + op)(2 * frame) - exp = r_f(frame, 2 * frame) - tm.assert_frame_equal(result, exp) - - # vs mix float - result = getattr(mixed_float, op)(2 * mixed_float) - exp = f(mixed_float, 2 * mixed_float) - tm.assert_frame_equal(result, exp) - _check_mixed_float(result, dtype=dict(C=None)) - - result = getattr(intframe, op)(2 * intframe) - exp = f(intframe, 2 * intframe) - tm.assert_frame_equal(result, exp) - - # vs mix int - if op in ['add', 'sub', 'mul']: - result = getattr(mixed_int, op)(2 + mixed_int) - exp = f(mixed_int, 2 + mixed_int) - - # no overflow in the uint - dtype = None - if op in ['sub']: - dtype = dict(B='uint64', C=None) - elif op in ['add', 'mul']: - dtype = dict(C=None) - tm.assert_frame_equal(result, exp) - _check_mixed_int(result, dtype=dtype) - except: - printing.pprint_thing("Failing operation %r" % op) - raise - - # ndim >= 3 - ndim_5 = np.ones(frame.shape + (3, 4, 5)) + def test_arith_flex_frame(self, all_arithmetic_operators, float_frame, + mixed_float_frame): + # one instance of parametrized fixture + op = all_arithmetic_operators + + def f(x, y): + if op.startswith('__r'): + # get op without "r" and invert it + return getattr(operator, op.replace('__r', '__'))(y, x) + return getattr(operator, op)(x, y) + + result = getattr(float_frame, op)(2 * float_frame) + exp = f(float_frame, 2 * float_frame) + tm.assert_frame_equal(result, exp) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + exp = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, exp) + _check_mixed_float(result, dtype=dict(C=None)) + + @pytest.mark.parametrize('op', ['__add__', '__sub__', '__mul__']) + def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, + mixed_float_frame): + f = getattr(operator, op) + + # vs mix int + result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) + exp = f(mixed_int_frame, 2 + mixed_int_frame) + + # no overflow in the uint + dtype = None + if op in ['__sub__']: + dtype = dict(B='uint64', C=None) + elif op in ['__add__', '__mul__']: + dtype = dict(C=None) + tm.assert_frame_equal(result, exp) + _check_mixed_int(result, dtype=dtype) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + exp = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, exp) + _check_mixed_float(result, dtype=dict(C=None)) + + # vs plain int + result = getattr(int_frame, op)(2 * int_frame) + exp = f(int_frame, 2 * int_frame) + tm.assert_frame_equal(result, exp) + + def test_arith_flex_frame_corner(self, all_arithmetic_operators, + float_frame): + # one instance of parametrized fixture + op = all_arithmetic_operators + + # Check that arrays with dim >= 3 raise + for dim in range(3, 6): + arr = np.ones((1,) * dim) msg = "Unable to coerce to Series/DataFrame" with tm.assert_raises_regex(ValueError, msg): - f(frame, ndim_5) + getattr(float_frame, op)(arr) - with tm.assert_raises_regex(ValueError, msg): - getattr(frame, op)(ndim_5) - - # res_add = frame.add(frame) - # res_sub = frame.sub(frame) - # res_mul = frame.mul(frame) - # res_div = frame.div(2 * frame) - - # tm.assert_frame_equal(res_add, frame + frame) - # tm.assert_frame_equal(res_sub, frame - frame) - # tm.assert_frame_equal(res_mul, frame * frame) - # tm.assert_frame_equal(res_div, frame / (2 * frame)) - - const_add = frame.add(1) - tm.assert_frame_equal(const_add, frame + 1) + const_add = float_frame.add(1) + tm.assert_frame_equal(const_add, float_frame + 1) # corner cases - result = frame.add(frame[:0]) - tm.assert_frame_equal(result, frame * np.nan) + result = float_frame.add(float_frame[:0]) + tm.assert_frame_equal(result, float_frame * np.nan) + + result = float_frame[:0].add(float_frame) + tm.assert_frame_equal(result, float_frame * np.nan) - result = frame[:0].add(frame) - tm.assert_frame_equal(result, frame * np.nan) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - frame.add(frame.iloc[0], fill_value=3) + float_frame.add(float_frame.iloc[0], fill_value=3) + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - frame.add(frame.iloc[0], axis='index', fill_value=3) - - def test_arith_flex_series(self): - arr = np.array([[1., 2., 3.], - [4., 5., 6.], - [7., 8., 9.]]) - df = pd.DataFrame(arr, columns=['one', 'two', 'three'], - index=['a', 'b', 'c']) + float_frame.add(float_frame.iloc[0], axis='index', fill_value=3) + + def test_arith_flex_series(self, simple_frame): + df = simple_frame row = df.xs('a') col = df['two'] From e903579801254bb819bc0c211110ba2bf8c67204 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 22:11:34 +0200 Subject: [PATCH 02/13] Fixturize tests/frame/test_analytics --- pandas/tests/frame/conftest.py | 29 ++ pandas/tests/frame/test_analytics.py | 724 ++++++++++++++------------- 2 files changed, 395 insertions(+), 358 deletions(-) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 4a4ce4540b9d5..348331fc0ccdf 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -17,6 +17,20 @@ def float_frame(): return DataFrame(tm.getSeriesData()) +@pytest.fixture +def float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(tm.getSeriesData()) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + @pytest.fixture def float_frame2(): """ @@ -27,6 +41,21 @@ def float_frame2(): return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) +@pytest.fixture +def bool_frame_with_na(): + """ + Fixture for DataFrame of booleans with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + df = DataFrame(tm.getSeriesData()) > 0 + df = df.astype(object) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + @pytest.fixture def int_frame(): """ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 52a52a1fd8752..f26217969c7ad 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -23,54 +23,194 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.tests.frame.common import TestData -class TestDataFrameAnalytics(TestData): +def assert_stat_op_calc(opname, alternative, main_frame, has_skipna=True, + check_dtype=True, check_dates=False, + check_less_precise=False, skipna_alternative=None): + + f = getattr(main_frame, opname) + + if check_dates: + df = DataFrame({'b': date_range('1/1/2001', periods=2)}) + result = getattr(df, opname)() + assert isinstance(result, Series) + + df['a'] = lrange(len(df)) + result = getattr(df, opname)() + assert isinstance(result, Series) + assert len(result) + + if has_skipna: + def wrapper(x): + return alternative(x.values) + + skipna_wrapper = tm._make_skipna_wrapper(alternative, + skipna_alternative) + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, main_frame.apply(wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise) + # HACK: win32 + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + check_dtype=False, + check_less_precise=check_less_precise) + else: + skipna_wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise) + if opname in ['sum', 'prod']: + expected = main_frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal(result1, expected, check_dtype=False, + check_less_precise=check_less_precise) + + # check dtypes + if check_dtype: + lcd_dtype = main_frame.values.dtype + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype + + # bad axis + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + + # all NA case + if has_skipna: + all_na = main_frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname in ['sum', 'prod']: + unit = 1 if opname == 'prod' else 0 # result for empty sum/prod + expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) + + +# underscores added to distinguish argument names from fixture names +def assert_stat_op_api(opname, float_frame_, float_string_frame_, + has_numeric_only=False): + + # make sure works on mixed-type frame + getattr(float_string_frame_, opname)(axis=0) + getattr(float_string_frame_, opname)(axis=1) + + if has_numeric_only: + getattr(float_string_frame_, opname)(axis=0, numeric_only=True) + getattr(float_string_frame_, opname)(axis=1, numeric_only=True) + getattr(float_frame_, opname)(axis=0, numeric_only=False) + getattr(float_frame_, opname)(axis=1, numeric_only=False) + + +def assert_bool_op_calc(opname, alternative, main_frame, has_skipna=True): + + f = getattr(main_frame, opname) + + if has_skipna: + def skipna_wrapper(x): + nona = x.dropna().values + return alternative(nona) + + def wrapper(x): + return alternative(x.values) + + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal(result0, main_frame.apply(wrapper)) + tm.assert_series_equal(result1, main_frame.apply(wrapper, axis=1), + check_dtype=False) # HACK: win32 + else: + skipna_wrapper = alternative + wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal(result0, main_frame.apply(skipna_wrapper)) + tm.assert_series_equal(result1, main_frame.apply(skipna_wrapper, axis=1), + check_dtype=False) + + # bad axis + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + + # all NA case + if has_skipna: + all_na = main_frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname == 'any': + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + + +# underscores added to distinguish argument names from fixture names +def assert_bool_op_api(opname, bool_frame_with_na_, float_string_frame_, + has_bool_only=False): + # make sure op works on mixed-type frame + mixed = float_string_frame_ + mixed['_bool_'] = np.random.randn(len(mixed)) > 0.5 + getattr(mixed, opname)(axis=0) + getattr(mixed, opname)(axis=1) + + class NonzeroFail(object): + + def __nonzero__(self): + raise ValueError + + mixed['_nonzero_fail_'] = NonzeroFail() + + if has_bool_only: + getattr(mixed, opname)(axis=0, bool_only=True) + getattr(mixed, opname)(axis=1, bool_only=True) + getattr(bool_frame_with_na_, opname)(axis=0, bool_only=False) + getattr(bool_frame_with_na_, opname)(axis=1, bool_only=False) + + +class TestDataFrameAnalytics(): # ---------------------------------------------------------------------= # Correlation and covariance @td.skip_if_no_scipy - def test_corr_pearson(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_pearson(self, float_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan - self._check_method('pearson') + self._check_method(float_frame, 'pearson') @td.skip_if_no_scipy - def test_corr_kendall(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_kendall(self, float_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan - self._check_method('kendall') + self._check_method(float_frame, 'kendall') @td.skip_if_no_scipy - def test_corr_spearman(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_spearman(self, float_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan - self._check_method('spearman') + self._check_method(float_frame, 'spearman') - def _check_method(self, method='pearson', check_minp=False): - if not check_minp: - correls = self.frame.corr(method=method) - exp = self.frame['A'].corr(self.frame['C'], method=method) - tm.assert_almost_equal(correls['A']['C'], exp) - else: - result = self.frame.corr(min_periods=len(self.frame) - 8) - expected = self.frame.corr() - expected.loc['A', 'B'] = expected.loc['B', 'A'] = nan - tm.assert_frame_equal(result, expected) + def _check_method(self, frame, method='pearson'): + correls = frame.corr(method=method) + expected = frame['A'].corr(frame['C'], method=method) + tm.assert_almost_equal(correls['A']['C'], expected) @td.skip_if_no_scipy - def test_corr_non_numeric(self): - self.frame['A'][:5] = nan - self.frame['B'][5:10] = nan + def test_corr_non_numeric(self, float_frame, float_string_frame): + float_frame['A'][:5] = nan + float_frame['B'][5:10] = nan # exclude non-numeric types - result = self.mixed_frame.corr() - expected = self.mixed_frame.loc[:, ['A', 'B', 'C', 'D']].corr() + result = float_string_frame.corr() + expected = float_string_frame.loc[:, ['A', 'B', 'C', 'D']].corr() tm.assert_frame_equal(result, expected) @td.skip_if_no_scipy @@ -138,36 +278,36 @@ def test_corr_invalid_method(self): with tm.assert_raises_regex(ValueError, msg): df.corr(method="____") - def test_cov(self): + def test_cov(self, float_frame, float_string_frame): # min_periods no NAs (corner case) - expected = self.frame.cov() - result = self.frame.cov(min_periods=len(self.frame)) + expected = float_frame.cov() + result = float_frame.cov(min_periods=len(float_frame)) tm.assert_frame_equal(expected, result) - result = self.frame.cov(min_periods=len(self.frame) + 1) + result = float_frame.cov(min_periods=len(float_frame) + 1) assert isna(result.values).all() # with NAs - frame = self.frame.copy() + frame = float_frame.copy() frame['A'][:5] = nan frame['B'][5:10] = nan - result = self.frame.cov(min_periods=len(self.frame) - 8) - expected = self.frame.cov() + result = float_frame.cov(min_periods=len(float_frame) - 8) + expected = float_frame.cov() expected.loc['A', 'B'] = np.nan expected.loc['B', 'A'] = np.nan # regular - self.frame['A'][:5] = nan - self.frame['B'][:10] = nan - cov = self.frame.cov() + float_frame['A'][:5] = nan + float_frame['B'][:10] = nan + cov = float_frame.cov() tm.assert_almost_equal(cov['A']['C'], - self.frame['A'].cov(self.frame['C'])) + float_frame['A'].cov(float_frame['C'])) # exclude non-numeric types - result = self.mixed_frame.cov() - expected = self.mixed_frame.loc[:, ['A', 'B', 'C', 'D']].cov() + result = float_string_frame.cov() + expected = float_string_frame.loc[:, ['A', 'B', 'C', 'D']].cov() tm.assert_frame_equal(result, expected) # Single column frame @@ -182,11 +322,11 @@ def test_cov(self): index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) - def test_corrwith(self): - a = self.tsframe + def test_corrwith(self, datetime_frame): + a = datetime_frame noise = Series(randn(len(a)), index=a.index) - b = self.tsframe.add(noise, axis=0) + b = datetime_frame.add(noise, axis=0) # make sure order does not matter b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) @@ -231,9 +371,9 @@ def test_corrwith_with_objects(self): expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) tm.assert_series_equal(result, expected) - def test_corrwith_series(self): - result = self.tsframe.corrwith(self.tsframe['A']) - expected = self.tsframe.apply(self.tsframe['A'].corr) + def test_corrwith_series(self, datetime_frame): + result = datetime_frame.corrwith(datetime_frame['A']) + expected = datetime_frame.apply(datetime_frame['A'].corr) tm.assert_series_equal(result, expected) @@ -460,13 +600,12 @@ def test_reduce_mixed_frame(self): np.array([2, 150, 'abcde'], dtype=object)) tm.assert_series_equal(test, df.T.sum(axis=1)) - def test_count(self): + def test_count(self, float_frame_with_na, float_frame, float_string_frame): f = lambda s: notna(s).sum() - self._check_stat_op('count', f, - has_skipna=False, - has_numeric_only=True, - check_dtype=False, - check_dates=True) + assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, + check_dtype=False, check_dates=True) + assert_stat_op_api('count', float_frame, float_string_frame, + has_numeric_only=True) # corner case frame = DataFrame() @@ -492,10 +631,13 @@ def test_count(self): expected = Series(0, index=[]) tm.assert_series_equal(result, expected) - def test_nunique(self): + def test_nunique(self, float_frame_with_na, float_frame, + float_string_frame): f = lambda s: len(algorithms.unique1d(s.dropna())) - self._check_stat_op('nunique', f, has_skipna=False, - check_dtype=False, check_dates=True) + assert_stat_op_calc('nunique', f, float_frame_with_na, + has_skipna=False, check_dtype=False, + check_dates=True) + assert_stat_op_api('nunique', float_frame, float_string_frame) df = DataFrame({'A': [1, 1, 1], 'B': [1, 2, 3], @@ -507,19 +649,18 @@ def test_nunique(self): tm.assert_series_equal(df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2})) - def test_sum(self): - self._check_stat_op('sum', np.sum, has_numeric_only=True, + def test_sum(self, float_frame_with_na, mixed_float_frame, + float_frame, float_string_frame): + assert_stat_op_api('sum', float_frame, float_string_frame, + has_numeric_only=True) + assert_stat_op_calc('sum', np.sum, float_frame_with_na, skipna_alternative=np.nansum) - # mixed types (with upcasting happening) - self._check_stat_op('sum', np.sum, - frame=self.mixed_float.astype('float32'), - has_numeric_only=True, check_dtype=False, - check_less_precise=True) + assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), + check_dtype=False, check_less_precise=True) - @pytest.mark.parametrize( - "method", ['sum', 'mean', 'prod', 'var', - 'std', 'skew', 'min', 'max']) + @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', + 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): # GH #676 data = { @@ -529,8 +670,7 @@ def test_stat_operators_attempt_obj_array(self, method): 'c': [0.00031111847529610595, 0.0014902627951905339, -0.00094099200035979691] } - df1 = DataFrame(data, index=['foo', 'bar', 'baz'], - dtype='O') + df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) @@ -543,41 +683,51 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self): - self._check_stat_op('mean', np.mean, check_dates=True) + def test_mean(self, float_frame_with_na, float_frame, float_string_frame): + assert_stat_op_calc('mean', np.mean, float_frame_with_na, + check_dates=True) + assert_stat_op_api('mean', float_frame, float_string_frame) - def test_product(self): - self._check_stat_op('product', np.prod) + def test_product(self, float_frame_with_na, float_frame, + float_string_frame): + assert_stat_op_calc('product', np.prod, float_frame_with_na) + assert_stat_op_api('product', float_frame, float_string_frame) # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self): + def test_median(self, float_frame_with_na, float_frame, + float_string_frame): def wrapper(x): if isna(x).any(): return np.nan return np.median(x) - self._check_stat_op('median', wrapper, check_dates=True) + assert_stat_op_calc('median', wrapper, float_frame_with_na, + check_dates=True) + assert_stat_op_api('median', float_frame, float_string_frame) - def test_min(self): + def test_min(self, float_frame_with_na, int_frame, + float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self._check_stat_op('min', np.min, check_dates=True) - self._check_stat_op('min', np.min, frame=self.intframe) + assert_stat_op_calc('min', np.min, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('min', np.min, int_frame) + assert_stat_op_api('min', float_frame, float_string_frame) - def test_cummin(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cummin = self.tsframe.cummin() - expected = self.tsframe.apply(Series.cummin) + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) tm.assert_frame_equal(cummin, expected) # axis = 1 - cummin = self.tsframe.cummin(axis=1) - expected = self.tsframe.apply(Series.cummin, axis=1) + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) tm.assert_frame_equal(cummin, expected) # it works @@ -585,22 +735,22 @@ def test_cummin(self): result = df.cummin() # noqa # fix issue - cummin_xs = self.tsframe.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(self.tsframe) + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) - def test_cummax(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cummax = self.tsframe.cummax() - expected = self.tsframe.apply(Series.cummax) + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) tm.assert_frame_equal(cummax, expected) # axis = 1 - cummax = self.tsframe.cummax(axis=1) - expected = self.tsframe.apply(Series.cummax, axis=1) + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) tm.assert_frame_equal(cummax, expected) # it works @@ -608,32 +758,39 @@ def test_cummax(self): result = df.cummax() # noqa # fix issue - cummax_xs = self.tsframe.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(self.tsframe) + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) - def test_max(self): + def test_max(self, float_frame_with_na, int_frame, + float_frame, float_string_frame): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", RuntimeWarning) - self._check_stat_op('max', np.max, check_dates=True) - self._check_stat_op('max', np.max, frame=self.intframe) + assert_stat_op_calc('max', np.max, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('max', np.max, int_frame) + assert_stat_op_api('max', float_frame, float_string_frame) - def test_mad(self): + def test_mad(self, float_frame_with_na, float_frame, float_string_frame): f = lambda x: np.abs(x - x.mean()).mean() - self._check_stat_op('mad', f) + assert_stat_op_calc('mad', f, float_frame_with_na) + assert_stat_op_api('mad', float_frame, float_string_frame) - def test_var_std(self): + def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, + float_string_frame): alt = lambda x: np.var(x, ddof=1) - self._check_stat_op('var', alt) + assert_stat_op_calc('var', alt, float_frame_with_na) + assert_stat_op_api('var', float_frame, float_string_frame) alt = lambda x: np.std(x, ddof=1) - self._check_stat_op('std', alt) + assert_stat_op_calc('std', alt, float_frame_with_na) + assert_stat_op_api('std', float_frame, float_string_frame) - result = self.tsframe.std(ddof=4) - expected = self.tsframe.apply(lambda x: x.std(ddof=4)) + result = datetime_frame.std(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4)) tm.assert_almost_equal(result, expected) - result = self.tsframe.var(ddof=4) - expected = self.tsframe.apply(lambda x: x.var(ddof=4)) + result = datetime_frame.var(ddof=4) + expected = datetime_frame.apply(lambda x: x.var(ddof=4)) tm.assert_almost_equal(result, expected) arr = np.repeat(np.random.random((1, 1000)), 1000, 0) @@ -685,19 +842,19 @@ def test_mixed_ops(self, op): result = getattr(df, op)() assert len(result) == 2 - def test_cumsum(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cumsum = self.tsframe.cumsum() - expected = self.tsframe.apply(Series.cumsum) + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) tm.assert_frame_equal(cumsum, expected) # axis = 1 - cumsum = self.tsframe.cumsum(axis=1) - expected = self.tsframe.apply(Series.cumsum, axis=1) + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) tm.assert_frame_equal(cumsum, expected) # works @@ -705,44 +862,46 @@ def test_cumsum(self): result = df.cumsum() # noqa # fix issue - cumsum_xs = self.tsframe.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(self.tsframe) + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) - def test_cumprod(self): - self.tsframe.loc[5:10, 0] = nan - self.tsframe.loc[10:15, 1] = nan - self.tsframe.loc[15:, 2] = nan + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = nan + datetime_frame.loc[10:15, 1] = nan + datetime_frame.loc[15:, 2] = nan # axis = 0 - cumprod = self.tsframe.cumprod() - expected = self.tsframe.apply(Series.cumprod) + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) tm.assert_frame_equal(cumprod, expected) # axis = 1 - cumprod = self.tsframe.cumprod(axis=1) - expected = self.tsframe.apply(Series.cumprod, axis=1) + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) tm.assert_frame_equal(cumprod, expected) # fix issue - cumprod_xs = self.tsframe.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(self.tsframe) + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) # ints - df = self.tsframe.fillna(0).astype(int) + df = datetime_frame.fillna(0).astype(int) df.cumprod(0) df.cumprod(1) # ints32 - df = self.tsframe.fillna(0).astype(np.int32) + df = datetime_frame.fillna(0).astype(np.int32) df.cumprod(0) df.cumprod(1) - def test_sem(self): + def test_sem(self, float_frame_with_na, datetime_frame, + float_frame, float_string_frame): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - self._check_stat_op('sem', alt) + assert_stat_op_calc('sem', alt, float_frame_with_na) + assert_stat_op_api('sem', float_frame, float_string_frame) - result = self.tsframe.sem(ddof=4) - expected = self.tsframe.apply( + result = datetime_frame.sem(ddof=4) + expected = datetime_frame.apply( lambda x: x.std(ddof=4) / np.sqrt(len(x))) tm.assert_almost_equal(result, expected) @@ -755,7 +914,7 @@ def test_sem(self): assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self): + def test_skew(self, float_frame_with_na, float_frame, float_string_frame): from scipy.stats import skew def alt(x): @@ -763,10 +922,11 @@ def alt(x): return np.nan return skew(x, bias=False) - self._check_stat_op('skew', alt) + assert_stat_op_calc('skew', alt, float_frame_with_na) + assert_stat_op_api('skew', float_frame, float_string_frame) @td.skip_if_no_scipy - def test_kurt(self): + def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): from scipy.stats import kurtosis def alt(x): @@ -774,7 +934,8 @@ def alt(x): return np.nan return kurtosis(x, bias=False) - self._check_stat_op('kurt', alt) + assert_stat_op_calc('kurt', alt, float_frame_with_na) + assert_stat_op_api('kurt', float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], @@ -788,92 +949,6 @@ def alt(x): assert kurt.name is None assert kurt2.name == 'bar' - def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, - has_numeric_only=False, check_dtype=True, - check_dates=False, check_less_precise=False, - skipna_alternative=None): - if frame is None: - frame = self.frame - # set some NAs - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan - - f = getattr(frame, name) - - if check_dates: - df = DataFrame({'b': date_range('1/1/2001', periods=2)}) - _f = getattr(df, name) - result = _f() - assert isinstance(result, Series) - - df['a'] = lrange(len(df)) - result = getattr(df, name)() - assert isinstance(result, Series) - assert len(result) - - if has_skipna: - def wrapper(x): - return alternative(x.values) - - skipna_wrapper = tm._make_skipna_wrapper(alternative, - skipna_alternative) - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise) - # HACK: win32 - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), - check_dtype=False, - check_less_precise=check_less_precise) - else: - skipna_wrapper = alternative - wrapper = alternative - - result0 = f(axis=0) - result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise) - if name in ['sum', 'prod']: - exp = frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal(result1, exp, check_dtype=False, - check_less_precise=check_less_precise) - - # check dtypes - if check_dtype: - lcd_dtype = frame.values.dtype - assert lcd_dtype == result0.dtype - assert lcd_dtype == result1.dtype - - # result = f(axis=1) - # comp = frame.apply(alternative, axis=1).reindex(result.index) - # assert_series_equal(result, comp) - - # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) - # make sure works on mixed-type frame - getattr(self.mixed_frame, name)(axis=0) - getattr(self.mixed_frame, name)(axis=1) - - if has_numeric_only: - getattr(self.mixed_frame, name)(axis=0, numeric_only=True) - getattr(self.mixed_frame, name)(axis=1, numeric_only=True) - getattr(self.frame, name)(axis=0, numeric_only=False) - getattr(self.frame, name)(axis=1, numeric_only=False) - - # all NA case - if has_skipna: - all_na = self.frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name in ['sum', 'prod']: - unit = int(name == 'prod') - expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) - tm.assert_series_equal(r0, expected) - expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) - tm.assert_series_equal(r1, expected) - @pytest.mark.parametrize("dropna, expected", [ (True, {'A': [12], 'B': [10.0], @@ -1022,9 +1097,9 @@ def test_operators_timedelta64(self): assert df['off1'].dtype == 'timedelta64[ns]' assert df['off2'].dtype == 'timedelta64[ns]' - def test_sum_corner(self): - axis0 = self.empty.sum(0) - axis1 = self.empty.sum(1) + def test_sum_corner(self, empty_frame): + axis0 = empty_frame.sum(0) + axis1 = empty_frame.sum(1) assert isinstance(axis0, Series) assert isinstance(axis1, Series) assert len(axis0) == 0 @@ -1090,59 +1165,61 @@ def test_sum_nanops_timedelta(self): expected = pd.Series([0, 0, np.nan], dtype='m8[ns]', index=idx) tm.assert_series_equal(result, expected) - def test_sum_object(self): - values = self.frame.values.astype(int) - frame = DataFrame(values, index=self.frame.index, - columns=self.frame.columns) + def test_sum_object(self, float_frame): + values = float_frame.values.astype(int) + frame = DataFrame(values, index=float_frame.index, + columns=float_frame.columns) deltas = frame * timedelta(1) deltas.sum() - def test_sum_bool(self): + def test_sum_bool(self, float_frame): # ensure this works, bug report - bools = np.isnan(self.frame) + bools = np.isnan(float_frame) bools.sum(1) bools.sum(0) - def test_mean_corner(self): + def test_mean_corner(self, float_frame, float_string_frame): # unit test when have object data - the_mean = self.mixed_frame.mean(axis=0) - the_sum = self.mixed_frame.sum(axis=0, numeric_only=True) + the_mean = float_string_frame.mean(axis=0) + the_sum = float_string_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) - assert len(the_mean.index) < len(self.mixed_frame.columns) + assert len(the_mean.index) < len(float_string_frame.columns) # xs sum mixed type, just want to know it works... - the_mean = self.mixed_frame.mean(axis=1) - the_sum = self.mixed_frame.sum(axis=1, numeric_only=True) + the_mean = float_string_frame.mean(axis=1) + the_sum = float_string_frame.sum(axis=1, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) # take mean of boolean column - self.frame['bool'] = self.frame['A'] > 0 - means = self.frame.mean(0) - assert means['bool'] == self.frame['bool'].values.mean() + float_frame['bool'] = float_frame['A'] > 0 + means = float_frame.mean(0) + assert means['bool'] == float_frame['bool'].values.mean() - def test_stats_mixed_type(self): + def test_stats_mixed_type(self, float_string_frame): # don't blow up - self.mixed_frame.std(1) - self.mixed_frame.var(1) - self.mixed_frame.mean(1) - self.mixed_frame.skew(1) + float_string_frame.std(1) + float_string_frame.var(1) + float_string_frame.mean(1) + float_string_frame.skew(1) + # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self): + def test_median_corner(self, int_frame, float_frame, float_string_frame): def wrapper(x): if isna(x).any(): return np.nan return np.median(x) - self._check_stat_op('median', wrapper, frame=self.intframe, - check_dtype=False, check_dates=True) + assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, + check_dates=True) + assert_stat_op_api('median', float_frame, float_string_frame) # Miscellanea - def test_count_objects(self): - dm = DataFrame(self.mixed_frame._series) - df = DataFrame(self.mixed_frame._series) + def test_count_objects(self, float_string_frame): + dm = DataFrame(float_string_frame._series) + df = DataFrame(float_string_frame._series) tm.assert_series_equal(dm.count(), df.count()) tm.assert_series_equal(dm.count(1), df.count(1)) @@ -1160,13 +1237,13 @@ def test_sum_bools(self): # Index of max / min - def test_idxmin(self): - frame = self.frame + def test_idxmin(self, float_frame, int_frame): + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: - for df in [frame, self.intframe]: + for df in [frame, int_frame]: result = df.idxmin(axis=axis, skipna=skipna) expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) @@ -1174,13 +1251,13 @@ def test_idxmin(self): pytest.raises(ValueError, frame.idxmin, axis=2) - def test_idxmax(self): - frame = self.frame + def test_idxmax(self, float_frame, int_frame): + frame = float_frame frame.loc[5:10] = np.nan frame.loc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: - for df in [frame, self.intframe]: + for df in [frame, int_frame]: result = df.idxmax(axis=axis, skipna=skipna) expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) @@ -1191,9 +1268,12 @@ def test_idxmax(self): # ---------------------------------------------------------------------- # Logical reductions - def test_any_all(self): - self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True) - self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True) + @pytest.mark.parametrize('opname', ['any', 'all']) + def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + assert_bool_op_calc(opname, getattr(np, opname), bool_frame_with_na, + has_skipna=True) + assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, + has_bool_only=True) def test_any_all_extra(self): df = DataFrame({ @@ -1325,79 +1405,6 @@ def test_any_all_level_axis_none_raises(self, method): with tm.assert_raises_regex(ValueError, xpr): getattr(df, method)(axis=None, level='out') - def _check_bool_op(self, name, alternative, frame=None, has_skipna=True, - has_bool_only=False): - if frame is None: - frame = self.frame > 0 - # set some NAs - frame = DataFrame(frame.values.astype(object), frame.index, - frame.columns) - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan - - f = getattr(frame, name) - - if has_skipna: - def skipna_wrapper(x): - nona = x.dropna().values - return alternative(nona) - - def wrapper(x): - return alternative(x.values) - - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) - tm.assert_series_equal(result0, frame.apply(wrapper)) - tm.assert_series_equal(result1, frame.apply(wrapper, axis=1), - check_dtype=False) # HACK: win32 - else: - skipna_wrapper = alternative - wrapper = alternative - - result0 = f(axis=0) - result1 = f(axis=1) - tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) - tm.assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1), - check_dtype=False) - - # result = f(axis=1) - # comp = frame.apply(alternative, axis=1).reindex(result.index) - # assert_series_equal(result, comp) - - # bad axis - pytest.raises(ValueError, f, axis=2) - - # make sure works on mixed-type frame - mixed = self.mixed_frame - mixed['_bool_'] = np.random.randn(len(mixed)) > 0 - getattr(mixed, name)(axis=0) - getattr(mixed, name)(axis=1) - - class NonzeroFail(object): - - def __nonzero__(self): - raise ValueError - - mixed['_nonzero_fail_'] = NonzeroFail() - - if has_bool_only: - getattr(mixed, name)(axis=0, bool_only=True) - getattr(mixed, name)(axis=1, bool_only=True) - getattr(frame, name)(axis=0, bool_only=False) - getattr(frame, name)(axis=1, bool_only=False) - - # all NA case - if has_skipna: - all_na = frame * np.NaN - r0 = getattr(all_na, name)(axis=0) - r1 = getattr(all_na, name)(axis=1) - if name == 'any': - assert not r0.any() - assert not r1.any() - else: - assert r0.all() - assert r1.all() - # ---------------------------------------------------------------------- # Isin @@ -1746,34 +1753,34 @@ def test_pct_change(self): # Clip - def test_clip(self): - median = self.frame.median().median() - original = self.frame.copy() + def test_clip(self, float_frame): + median = float_frame.median().median() + original = float_frame.copy() - capped = self.frame.clip_upper(median) + capped = float_frame.clip_upper(median) assert not (capped.values > median).any() - floored = self.frame.clip_lower(median) + floored = float_frame.clip_lower(median) assert not (floored.values < median).any() - double = self.frame.clip(upper=median, lower=median) + double = float_frame.clip(upper=median, lower=median) assert not (double.values != median).any() - # Verify that self.frame was not changed inplace - assert (self.frame.values == original.values).all() + # Verify that float_frame was not changed inplace + assert (float_frame.values == original.values).all() - def test_inplace_clip(self): + def test_inplace_clip(self, float_frame): # GH #15388 - median = self.frame.median().median() - frame_copy = self.frame.copy() + median = float_frame.median().median() + frame_copy = float_frame.copy() frame_copy.clip_upper(median, inplace=True) assert not (frame_copy.values > median).any() - frame_copy = self.frame.copy() + frame_copy = float_frame.copy() frame_copy.clip_lower(median, inplace=True) assert not (frame_copy.values < median).any() - frame_copy = self.frame.copy() + frame_copy = float_frame.copy() frame_copy.clip(upper=median, lower=median, inplace=True) assert not (frame_copy.values != median).any() @@ -1839,9 +1846,10 @@ def test_clip_against_series(self, inplace): (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) ]) - def test_clip_against_list_like(self, inplace, lower, axis, res): + def test_clip_against_list_like(self, simple_frame, + inplace, lower, axis, res): # GH #15390 - original = self.simple.copy(deep=True) + original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) @@ -1869,12 +1877,12 @@ def test_clip_against_frame(self, axis): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) - def test_clip_with_na_args(self): + def test_clip_with_na_args(self, float_frame): """Should process np.nan argument as None """ # GH # 17276 - tm.assert_frame_equal(self.frame.clip(np.nan), self.frame) - tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan), - self.frame) + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) + tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), + float_frame) # GH #19992 df = DataFrame({'col_0': [1, 2, 3], 'col_1': [4, 5, 6], @@ -1919,8 +1927,8 @@ def test_dot(self): row = a.iloc[0].values result = a.dot(row) - exp = a.dot(a.iloc[0]) - tm.assert_series_equal(result, exp) + expected = a.dot(a.iloc[0]) + tm.assert_series_equal(result, expected) with tm.assert_raises_regex(ValueError, 'Dot product shape mismatch'): From be4605d60a72ba393473e04cf8699d802e37bd5e Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 23:05:20 +0200 Subject: [PATCH 03/13] Clean res/exp in frame/test_apply.py --- pandas/tests/frame/test_apply.py | 51 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index e27115cfc255b..54262163d8e16 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -95,9 +95,9 @@ def test_apply_empty(self, float_frame, empty_frame): assert_series_equal(result, expected) # 2476 - xp = DataFrame(index=['a']) - rs = xp.apply(lambda x: x['a'], axis=1) - assert_frame_equal(xp, rs) + expected = DataFrame(index=['a']) + result = expected.apply(lambda x: x['a'], axis=1) + assert_frame_equal(expected, result) def test_apply_with_reduce_empty(self, empty_frame): # reduce with an empty DataFrame @@ -126,12 +126,13 @@ def test_apply_deprecate_reduce(self, empty_frame): def test_apply_standard_nonunique(self): df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) - rs = df.apply(lambda s: s[0], axis=1) - xp = Series([1, 4, 7], ['a', 'a', 'c']) - assert_series_equal(rs, xp) - rs = df.T.apply(lambda s: s[0], axis=0) - assert_series_equal(rs, xp) + result = df.apply(lambda s: s[0], axis=1) + expected = Series([1, 4, 7], ['a', 'a', 'c']) + assert_series_equal(result, expected) + + result = df.T.apply(lambda s: s[0], axis=0) + assert_series_equal(result, expected) @pytest.mark.parametrize('func', ['sum', 'mean', 'min', 'max', 'std']) @pytest.mark.parametrize('args,kwds', [ @@ -265,13 +266,13 @@ def _check(df, f): is_reduction = not isinstance(test_res, np.ndarray) def _checkit(axis=0, raw=False): - res = df.apply(f, axis=axis, raw=raw) + result = df.apply(f, axis=axis, raw=raw) if is_reduction: agg_axis = df._get_agg_axis(axis) - assert isinstance(res, Series) - assert res.index is agg_axis + assert isinstance(result, Series) + assert result.index is agg_axis else: - assert isinstance(res, DataFrame) + assert isinstance(result, DataFrame) _checkit() _checkit(axis=1) @@ -298,16 +299,16 @@ def subtract_and_divide(x, sub, divide=1): return (x - sub) / divide result = float_frame.apply(add_some, howmuch=2) - exp = float_frame.apply(lambda x: x + 2) - assert_frame_equal(result, exp) + expected = float_frame.apply(lambda x: x + 2) + assert_frame_equal(result, expected) result = float_frame.apply(agg_and_add, howmuch=2) - exp = float_frame.apply(lambda x: x.mean() + 2) - assert_series_equal(result, exp) + expected = float_frame.apply(lambda x: x.mean() + 2) + assert_series_equal(result, expected) - res = float_frame.apply(subtract_and_divide, args=(2,), divide=2) - exp = float_frame.apply(lambda x: (x - 2.) / 2.) - assert_frame_equal(res, exp) + result = float_frame.apply(subtract_and_divide, args=(2,), divide=2) + expected = float_frame.apply(lambda x: (x - 2.) / 2.) + assert_frame_equal(result, expected) def test_apply_yield_list(self, float_frame): result = float_frame.apply(list) @@ -529,12 +530,12 @@ def test_applymap_box(self): 'd': [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')]}) - res = df.applymap(lambda x: '{0}'.format(x.__class__.__name__)) - exp = pd.DataFrame({'a': ['Timestamp', 'Timestamp'], - 'b': ['Timestamp', 'Timestamp'], - 'c': ['Timedelta', 'Timedelta'], - 'd': ['Period', 'Period']}) - tm.assert_frame_equal(res, exp) + result = df.applymap(lambda x: '{0}'.format(x.__class__.__name__)) + expected = pd.DataFrame({'a': ['Timestamp', 'Timestamp'], + 'b': ['Timestamp', 'Timestamp'], + 'c': ['Timedelta', 'Timedelta'], + 'd': ['Period', 'Period']}) + tm.assert_frame_equal(result, expected) def test_frame_apply_dont_convert_datetime64(self): from pandas.tseries.offsets import BDay From a399489b7d8668efb8568eba96b59e18d70874ef Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 23:07:24 +0200 Subject: [PATCH 04/13] Unify GH references in frame/test_apply.py --- pandas/tests/frame/test_apply.py | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 54262163d8e16..ca3469f34fee6 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -59,7 +59,7 @@ def test_apply(self, float_frame): [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) pytest.raises(ValueError, df.apply, lambda x: x, 2) - # see gh-9573 + # GH 9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) @@ -94,7 +94,7 @@ def test_apply_empty(self, float_frame, empty_frame): expected = Series(np.nan, index=float_frame.index) assert_series_equal(result, expected) - # 2476 + # GH 2476 expected = DataFrame(index=['a']) result = expected.apply(lambda x: x['a'], axis=1) assert_frame_equal(expected, result) @@ -468,11 +468,11 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) - # gh-465: function returning tuples + # GH 465: function returning tuples result = float_frame.applymap(lambda x: (x, x)) assert isinstance(result['A'][0], tuple) - # gh-2909: object conversion to float in constructor? + # GH 2909: object conversion to float in constructor? df = DataFrame(data=[1, 'a']) result = df.applymap(lambda x: x) assert result.dtypes[0] == object @@ -481,7 +481,7 @@ def test_applymap(self, float_frame): result = df.applymap(lambda x: x) assert result.dtypes[0] == object - # see gh-2786 + # GH 2786 df = DataFrame(np.random.random((3, 4))) df2 = df.copy() cols = ['a', 'a', 'a', 'a'] @@ -499,7 +499,7 @@ def test_applymap(self, float_frame): for f in ['datetime', 'timedelta']: assert result.loc[0, f] == str(df.loc[0, f]) - # see gh-8222 + # GH 8222 empty_frames = [pd.DataFrame(), pd.DataFrame(columns=list('ABC')), pd.DataFrame(index=list('ABC')), @@ -510,7 +510,7 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(result, frame) def test_applymap_box_timestamps(self): - # #2689, #2627 + # GH 2689, GH 2627 ser = pd.Series(date_range('1/1/2000', periods=10)) def func(x): @@ -547,7 +547,7 @@ def test_frame_apply_dont_convert_datetime64(self): assert df.x1.dtype == 'M8[ns]' def test_apply_non_numpy_dtype(self): - # See gh-12244 + # GH 12244 df = DataFrame({'dt': pd.date_range( "2015-01-01", periods=3, tz='Europe/Brussels')}) result = df.apply(lambda x: x) @@ -577,7 +577,7 @@ class TestInferOutputShape(object): # us to infer the output def test_infer_row_shape(self): - # gh-17437 + # GH 17437 # if row shape is changing, infer it df = pd.DataFrame(np.random.rand(10, 2)) result = df.apply(np.fft.fft, axis=0) @@ -587,7 +587,7 @@ def test_infer_row_shape(self): assert result.shape == (6, 2) def test_with_dictlike_columns(self): - # gh 17602 + # GH 17602 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.apply(lambda x: {'s': x['a'] + x['b']}, axis=1) @@ -605,7 +605,7 @@ def test_with_dictlike_columns(self): expected = Series([{'s': 3}, {'s': 3}]) assert_series_equal(result, expected) - # gh-18775 + # GH 18775 df = DataFrame() df["author"] = ["X", "Y", "Z"] df["publisher"] = ["BBC", "NBC", "N24"] @@ -617,7 +617,7 @@ def test_with_dictlike_columns(self): assert_series_equal(result, expected) def test_with_dictlike_columns_with_infer(self): - # gh 17602 + # GH 17602 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.apply(lambda x: {'s': x['a'] + x['b']}, axis=1, result_type='expand') @@ -631,7 +631,7 @@ def test_with_dictlike_columns_with_infer(self): assert_frame_equal(result, expected) def test_with_listlike_columns(self): - # gh-17348 + # GH 17348 df = DataFrame({'a': Series(np.random.randn(4)), 'b': ['a', 'list', 'of', 'words'], 'ts': date_range('2016-10-01', periods=4, freq='H')}) @@ -644,7 +644,7 @@ def test_with_listlike_columns(self): expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()]) assert_series_equal(result, expected) - # gh-18919 + # GH 18919 df = DataFrame({'x': Series([['a', 'b'], ['q']]), 'y': Series([['z'], ['q', 't']])}) df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')]) @@ -656,7 +656,7 @@ def test_with_listlike_columns(self): assert_series_equal(result, expected) def test_infer_output_shape_columns(self): - # gh-18573 + # GH 18573 df = DataFrame({'number': [1., 2.], 'string': ['foo', 'bar'], @@ -667,7 +667,7 @@ def test_infer_output_shape_columns(self): assert_series_equal(result, expected) def test_infer_output_shape_listlike_columns(self): - # gh-16353 + # GH 16353 df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) @@ -679,7 +679,7 @@ def test_infer_output_shape_listlike_columns(self): expected = Series([[1, 2] for t in df.itertuples()]) assert_series_equal(result, expected) - # gh-17970 + # GH 17970 df = DataFrame({"a": [1, 2, 3]}, index=list('abc')) result = df.apply(lambda row: np.ones(1), axis=1) @@ -692,7 +692,7 @@ def test_infer_output_shape_listlike_columns(self): index=df.index) assert_series_equal(result, expected) - # gh-17892 + # GH 17892 df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'), pd.Timestamp('2010-02-04'), pd.Timestamp('2010-02-05'), @@ -900,7 +900,7 @@ def f(): 'abs', 'shift', 'pct_change', 'cumsum', 'rank', ]) def test_transform_method_name(self, method): - # https://github.com/pandas-dev/pandas/issues/19760 + # GH 19760 df = pd.DataFrame({"A": [-1, 2]}) result = df.transform(method) expected = operator.methodcaller(method)(df) @@ -924,7 +924,7 @@ def test_demo(self): tm.assert_frame_equal(result.reindex_like(expected), expected) def test_agg_multiple_mixed_no_warning(self): - # https://github.com/pandas-dev/pandas/issues/20909 + # GH 20909 mdf = pd.DataFrame({'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], @@ -1107,7 +1107,7 @@ def test_non_callable_aggregates(self): ]), )) def test_agg_cython_table(self, df, func, expected, axis): - # GH21224 + # GH 21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = df.agg(func, axis=axis) @@ -1126,7 +1126,7 @@ def test_agg_cython_table(self, df, func, expected, axis): ]), )) def test_agg_cython_table_transform(self, df, func, expected, axis): - # GH21224 + # GH 21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = df.agg(func, axis=axis) @@ -1138,7 +1138,7 @@ def test_agg_cython_table_transform(self, df, func, expected, axis): ]), ) def test_agg_cython_table_raises(self, df, func, expected, axis): - # GH21224 + # GH 21224 with pytest.raises(expected): df.agg(func, axis=axis) @@ -1157,7 +1157,7 @@ def indices(draw, max_length=5): @given(index=indices(5), num_columns=integers(0, 5)) def test_frequency_is_original(self, index, num_columns): - # GH22150 + # GH 22150 original = index.copy() df = DataFrame(True, index=index, columns=range(num_columns)) df.apply(lambda x: x) From b403a735c3969a3d63bfe4fbc9531a7b1f5d4eb7 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 23:02:39 +0200 Subject: [PATCH 05/13] Clean up res/exp in frame/test_api.py --- pandas/tests/frame/test_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 35f2f566ef85e..7880f8d4405a1 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -366,9 +366,9 @@ def test_more_values(self, float_string_frame): def test_repr_with_mi_nat(self, float_string_frame): df = self.klass({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) - res = repr(df) - exp = ' X\nNaT a 1\n2013-01-01 b 2' - assert res == exp + result = repr(df) + expected = ' X\nNaT a 1\n2013-01-01 b 2' + assert result == expected def test_iteritems_names(self, float_string_frame): for k, v in compat.iteritems(float_string_frame): From fa0fc2f70aa1eddee5235fc2ed3d78797bd19775 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 20:58:17 +0200 Subject: [PATCH 06/13] Unify GH references in frame/test_api.py --- pandas/tests/frame/test_api.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 7880f8d4405a1..4b4296e4abc16 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -192,7 +192,7 @@ def test_iteritems(self): assert isinstance(v, self.klass._constructor_sliced) def test_items(self): - # issue #17213, #13918 + # GH 17213, GH 13918 cols = ['a', 'b', 'c'] df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) for c, (k, v) in zip(cols, df.items()): @@ -213,7 +213,7 @@ def test_iterrows(self, float_frame, float_string_frame): self._assert_series_equal(v, exp) def test_iterrows_iso8601(self): - # GH19671 + # GH 19671 if self.klass == SparseDataFrame: pytest.xfail(reason='SparseBlock datetime type not implemented.') @@ -354,7 +354,7 @@ def test_axis_aliases(self, float_frame): assert_series_equal(result, expected) def test_class_axis(self): - # https://github.com/pandas-dev/pandas/issues/18147 + # GH 18147 # no exception and no empty docstring assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) @@ -418,7 +418,7 @@ def test_values(self, float_frame): assert (float_frame.values[:, 0] == 5).all() def test_as_matrix_deprecated(self, float_frame): - # GH18458 + # GH 18458 with tm.assert_produces_warning(FutureWarning): cols = float_frame.columns.tolist() result = float_frame.as_matrix(columns=cols) @@ -439,7 +439,7 @@ def test_transpose_get_view(self, float_frame): assert (float_frame.values[5:10] == 5).all() def test_inplace_return_self(self): - # re #1893 + # GH 1893 data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'], 'b': [0, 0, 1, 1], @@ -503,7 +503,7 @@ def _check_f(base, f): _check_f(d.copy(), f) def test_tab_complete_warning(self, ip): - # https://github.com/pandas-dev/pandas/issues/16409 + # GH 16409 pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter From f56bfde5b1f67a3becc4cf5146caf347b1a6f4c3 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 21:02:52 +0200 Subject: [PATCH 07/13] Clean res/exp in frame/test_arithmetic.py --- pandas/tests/frame/test_arithmetic.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 1da208b2ec627..c9f45f8e0f6a9 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -138,13 +138,13 @@ def f(x, y): return getattr(operator, op)(x, y) result = getattr(float_frame, op)(2 * float_frame) - exp = f(float_frame, 2 * float_frame) - tm.assert_frame_equal(result, exp) + expected = f(float_frame, 2 * float_frame) + tm.assert_frame_equal(result, expected) # vs mix float result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) - exp = f(mixed_float_frame, 2 * mixed_float_frame) - tm.assert_frame_equal(result, exp) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) _check_mixed_float(result, dtype=dict(C=None)) @pytest.mark.parametrize('op', ['__add__', '__sub__', '__mul__']) @@ -154,7 +154,7 @@ def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, # vs mix int result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) - exp = f(mixed_int_frame, 2 + mixed_int_frame) + expected = f(mixed_int_frame, 2 + mixed_int_frame) # no overflow in the uint dtype = None @@ -162,19 +162,19 @@ def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame, dtype = dict(B='uint64', C=None) elif op in ['__add__', '__mul__']: dtype = dict(C=None) - tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, expected) _check_mixed_int(result, dtype=dtype) # vs mix float result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) - exp = f(mixed_float_frame, 2 * mixed_float_frame) - tm.assert_frame_equal(result, exp) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) _check_mixed_float(result, dtype=dict(C=None)) # vs plain int result = getattr(int_frame, op)(2 * int_frame) - exp = f(int_frame, 2 * int_frame) - tm.assert_frame_equal(result, exp) + expected = f(int_frame, 2 * int_frame) + tm.assert_frame_equal(result, expected) def test_arith_flex_frame_corner(self, all_arithmetic_operators, float_frame): From f29a8396556fab977d35c30e946223d303f25ce8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 23:01:07 +0200 Subject: [PATCH 08/13] Unify GH references for frame/test_arithmetic.py --- pandas/tests/frame/test_arithmetic.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index c9f45f8e0f6a9..f955206062a5a 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -17,7 +17,7 @@ class TestFrameComparisons(object): def test_flex_comparison_nat(self): - # GH#15697, GH#22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, # and _definitely_ not be NaN df = pd.DataFrame([pd.NaT]) @@ -35,7 +35,7 @@ def test_flex_comparison_nat(self): assert result.iloc[0, 0].item() is True def test_mixed_comparison(self): - # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, + # GH 13128, GH 22163 != datetime64 vs non-dt64 should be False, # not raise TypeError # (this appears to be fixed before #22163, not sure when) df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]]) @@ -48,7 +48,7 @@ def test_mixed_comparison(self): assert result.all().all() def test_df_boolean_comparison_error(self): - # GH#4576 + # GH 4576 # boolean comparisons with a tuple/list give unexpected results df = pd.DataFrame(np.arange(6).reshape((3, 2))) @@ -77,7 +77,7 @@ def test_df_string_comparison(self): @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types(self, opname): - # GH#15077, non-empty DataFrame + # GH 15077, non-empty DataFrame df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 @@ -86,7 +86,7 @@ def test_df_flex_cmp_constant_return_types(self, opname): @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types_empty(self, opname): - # GH#15077 empty DataFrame + # GH 15077 empty DataFrame df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 @@ -100,7 +100,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): class TestFrameFlexArithmetic(object): def test_df_add_td64_columnwise(self): - # GH#22534 Check that column-wise addition broadcasts correctly + # GH 22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) tdi = pd.timedelta_range('1', periods=10) tser = pd.Series(tdi) @@ -112,7 +112,7 @@ def test_df_add_td64_columnwise(self): tm.assert_frame_equal(result, expected) def test_df_add_flex_filled_mixed_dtypes(self): - # GH#19611 + # GH 19611 dti = pd.date_range('2016-01-01', periods=3) ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]') df = pd.DataFrame({'A': dti, 'B': ser}) @@ -224,7 +224,7 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(df.div(row), df / row) tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - # broadcasting issue in GH#7325 + # broadcasting issue in GH 7325 df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='int64') expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0], axis='index') @@ -236,7 +236,7 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(result, expected) def test_arith_flex_zero_len_raises(self): - # GH#19522 passing fill_value to frame flex arith methods should + # GH 19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases ser_len0 = pd.Series([]) df_len0 = pd.DataFrame([], columns=['A', 'B']) @@ -251,7 +251,7 @@ def test_arith_flex_zero_len_raises(self): class TestFrameArithmetic(object): def test_df_bool_mul_int(self): - # GH#22047, GH#22163 multiplication by 1 should result in int dtype, + # GH 22047, GH 22163 multiplication by 1 should result in int dtype, # not object dtype df = pd.DataFrame([[False, True], [False, False]]) result = df * 1 From 780dd841dea4a66a7f2eeee3190ee4018bfaf849 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 21:13:42 +0200 Subject: [PATCH 09/13] Clean res/exp for frame/test_analytics.py --- pandas/tests/frame/test_analytics.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f26217969c7ad..9fe416aa6e20e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -479,8 +479,8 @@ def test_describe_categorical(self): cat = Series(Categorical(["a", "b", "c", "c"])) df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) - res = df3.describe() - tm.assert_numpy_array_equal(res["cat"].values, res["s"].values) + result = df3.describe() + tm.assert_numpy_array_equal(result["cat"].values, result["s"].values) def test_describe_categorical_columns(self): # GH 11558 @@ -551,8 +551,8 @@ def test_describe_timedelta_values(self): index=['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']) - res = df.describe() - tm.assert_frame_equal(res, expected) + result = df.describe() + tm.assert_frame_equal(result, expected) exp_repr = (" t1 t2\n" "count 5 5\n" @@ -563,7 +563,7 @@ def test_describe_timedelta_values(self): "50% 3 days 00:00:00 0 days 03:00:00\n" "75% 4 days 00:00:00 0 days 04:00:00\n" "max 5 days 00:00:00 0 days 05:00:00") - assert repr(res) == exp_repr + assert repr(result) == exp_repr def test_describe_tz_values(self, tz_naive_fixture): # GH 21332 @@ -584,8 +584,8 @@ def test_describe_tz_values(self, tz_naive_fixture): 'last', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] ) - res = df.describe(include='all') - tm.assert_frame_equal(res, expected) + result = df.describe(include='all') + tm.assert_frame_equal(result, expected) def test_reduce_mixed_frame(self): # GH 6806 @@ -1202,7 +1202,6 @@ def test_stats_mixed_type(self, float_string_frame): float_string_frame.mean(1) float_string_frame.skew(1) - # TODO: Ensure warning isn't emitted in the first place @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") def test_median_corner(self, int_frame, float_frame, float_string_frame): From 0e70a304d0d6594a48b1fe23f360d6c885aba2fc Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 22:57:12 +0200 Subject: [PATCH 10/13] Unify GH reference for frame/test_analytics.py --- pandas/tests/frame/test_analytics.py | 52 ++++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 9fe416aa6e20e..827a817534dd9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -271,8 +271,8 @@ def test_corr_cov_independent_index_column(self): assert result.index.equals(result.columns) def test_corr_invalid_method(self): - # GH PR #22298 - df = pd.DataFrame(np.random.normal(size=(10, 2))) + # GH 22298 + df = pd. DataFrame(np.random.normal(size=(10, 2))) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") with tm.assert_raises_regex(ValueError, msg): @@ -615,7 +615,7 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): ct2 = frame.count(0) assert isinstance(ct2, Series) - # GH #423 + # GH 423 df = DataFrame(index=lrange(10)) result = df.count(1) expected = Series(0, index=df.index) @@ -662,7 +662,7 @@ def test_sum(self, float_frame_with_na, mixed_float_frame, @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): - # GH #676 + # GH 676 data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], @@ -804,7 +804,7 @@ def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, @pytest.mark.parametrize( "meth", ['sem', 'var', 'std']) def test_numeric_only_flag(self, meth): - # GH #9201 + # GH 9201 df1 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) # set one entry to a number in str format df1.loc[0, 'foo'] = '100' @@ -1369,12 +1369,12 @@ def test_any_all_extra(self): (np.any, {'A': pd.Series([1, 2], dtype='category')}, True), # # Mix - # GH-21484 + # GH 21484 # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), # 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), ]) def test_any_all_np_func(self, func, data, expected): - # https://github.com/pandas-dev/pandas/issues/19976 + # GH 19976 data = DataFrame(data) result = func(data) assert isinstance(result, np.bool_) @@ -1386,7 +1386,7 @@ def test_any_all_np_func(self, func, data, expected): assert result.item() is expected def test_any_all_object(self): - # https://github.com/pandas-dev/pandas/issues/19976 + # GH 19976 result = np.all(DataFrame(columns=['a', 'b'])).item() assert result is True @@ -1408,7 +1408,7 @@ def test_any_all_level_axis_none_raises(self, method): # Isin def test_isin(self): - # GH #4211 + # GH 4211 df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) @@ -1420,7 +1420,7 @@ def test_isin(self): @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) def test_isin_empty(self, empty): - # see gh-16991 + # GH 16991 df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']}) expected = DataFrame(False, df.index, df.columns) @@ -1446,7 +1446,7 @@ def test_isin_dict(self): tm.assert_frame_equal(result, expected) def test_isin_with_string_scalar(self): - # GH4763 + # GH 4763 df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) @@ -1472,7 +1472,7 @@ def test_isin_df(self): tm.assert_frame_equal(result, expected) def test_isin_tuples(self): - # GH16394 + # GH 16394 df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) df['C'] = list(zip(df['A'], df['B'])) result = df['C'].isin([(1, 'a')]) @@ -1682,7 +1682,7 @@ def test_round(self): expected_rounded['col1']) def test_numpy_round(self): - # See gh-12600 + # GH 12600 df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) out = np.round(df, decimals=0) expected = DataFrame([[2., 1.], [0., 7.]]) @@ -1693,7 +1693,7 @@ def test_numpy_round(self): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): - # GH11885 + # GH 11885 df = DataFrame({'col1': [1.1, 2.2, 3.3, 4.4], 'col2': ['1', 'a', 'c', 'f'], 'col3': date_range('20111111', periods=4)}) @@ -1708,7 +1708,7 @@ def test_round_mixed_type(self): tm.assert_frame_equal(df.round({'col3': 1}), df) def test_round_issue(self): - # GH11611 + # GH 11611 df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'], index=['first', 'second', 'third']) @@ -1725,7 +1725,7 @@ def test_built_in_round(self): pytest.skip("build in round cannot be overridden " "prior to Python 3") - # GH11763 + # GH 11763 # Here's the test frame we'll be working with df = DataFrame( {'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]}) @@ -1769,7 +1769,7 @@ def test_clip(self, float_frame): assert (float_frame.values == original.values).all() def test_inplace_clip(self, float_frame): - # GH #15388 + # GH 15388 median = float_frame.median().median() frame_copy = float_frame.copy() @@ -1785,7 +1785,7 @@ def test_inplace_clip(self, float_frame): assert not (frame_copy.values != median).any() def test_dataframe_clip(self): - # GH #2747 + # GH 2747 df = DataFrame(np.random.randn(1000, 2)) for lb, ub in [(-1, 1), (1, -1)]: @@ -1812,7 +1812,7 @@ def test_clip_mixed_numeric(self): @pytest.mark.parametrize("inplace", [True, False]) def test_clip_against_series(self, inplace): - # GH #6966 + # GH 6966 df = DataFrame(np.random.randn(1000, 2)) lb = Series(np.random.randn(1000)) @@ -1847,7 +1847,7 @@ def test_clip_against_series(self, inplace): ]) def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): - # GH #15390 + # GH 15390 original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], @@ -1878,12 +1878,12 @@ def test_clip_against_frame(self, axis): def test_clip_with_na_args(self, float_frame): """Should process np.nan argument as None """ - # GH # 17276 + # GH 17276 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) - # GH #19992 + # GH 19992 df = DataFrame({'col_0': [1, 2, 3], 'col_1': [4, 5, 6], 'col_2': [7, 8, 9]}) @@ -1956,7 +1956,7 @@ def test_dot(self): _np_version_under1p12, reason="unpredictable return types under numpy < 1.12") def test_matmul(self): - # matmul test is for GH #10259 + # matmul test is for GH 10259 a = DataFrame(np.random.randn(3, 4), index=['a', 'b', 'c'], columns=['p', 'q', 'r', 's']) b = DataFrame(np.random.randn(4, 2), index=['p', 'q', 'r', 's'], @@ -2070,7 +2070,7 @@ class TestNLargestNSmallest(object): ['b', 'c', 'c']]) @pytest.mark.parametrize('n', range(1, 11)) def test_n(self, df_strings, nselect_method, n, order): - # GH10393 + # GH 10393 df = df_strings if 'b' in order: @@ -2103,7 +2103,7 @@ def test_n_all_dtypes(self, df_main_dtypes): df.nlargest(2, list(set(df) - {'category_string', 'string'})) def test_n_identical_values(self): - # GH15297 + # GH 15297 df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) result = df.nlargest(3, 'a') @@ -2137,7 +2137,7 @@ def test_n_duplicate_index(self, df_duplicates, n, order): tm.assert_frame_equal(result, expected) def test_duplicate_keep_all_ties(self): - # see gh-16818 + # GH 16818 df = pd.DataFrame({'a': [5, 4, 4, 2, 3, 3, 3, 3], 'b': [10, 9, 8, 7, 5, 50, 10, 20]}) result = df.nlargest(4, 'a', keep='all') From 855a1866a240629cde8cb9a31306c4d8fdc92317 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 23:43:02 +0200 Subject: [PATCH 11/13] Change GH reference formatting (review jreback) --- pandas/tests/frame/test_analytics.py | 82 +++++++++++++-------------- pandas/tests/frame/test_api.py | 14 ++--- pandas/tests/frame/test_apply.py | 60 ++++++++++---------- pandas/tests/frame/test_arithmetic.py | 20 +++---- 4 files changed, 88 insertions(+), 88 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 827a817534dd9..6f1713eb72348 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -262,7 +262,7 @@ def test_corr_int_and_boolean(self): tm.assert_frame_equal(result, expected) def test_corr_cov_independent_index_column(self): - # GH 14617 + # gh-14617 df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) for method in ['cov', 'corr']: @@ -271,7 +271,7 @@ def test_corr_cov_independent_index_column(self): assert result.index.equals(result.columns) def test_corr_invalid_method(self): - # GH 22298 + # gh-22298 df = pd. DataFrame(np.random.normal(size=(10, 2))) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") @@ -387,7 +387,7 @@ def test_corrwith_matches_corrcoef(self): assert c1 < 1 def test_corrwith_mixed_dtypes(self): - # GH 18570 + # gh-18570 df = pd.DataFrame({'a': [1, 4, 3, 2], 'b': [4, 6, 7, 3], 'c': ['a', 'b', 'c', 'd']}) s = pd.Series([0, 6, 7, 3]) @@ -420,7 +420,7 @@ def test_bool_describe_in_mixed_frame(self): tm.assert_frame_equal(result, expected) def test_describe_bool_frame(self): - # GH 13891 + # gh-13891 df = pd.DataFrame({ 'bool_data_1': [False, False, True, True], 'bool_data_2': [False, True, True, True] @@ -483,7 +483,7 @@ def test_describe_categorical(self): tm.assert_numpy_array_equal(result["cat"].values, result["s"].values) def test_describe_categorical_columns(self): - # GH 11558 + # gh-11558 columns = pd.CategoricalIndex(['int1', 'int2', 'obj'], ordered=True, name='XXX') df = DataFrame({'int1': [10, 20, 30, 40, 50], @@ -529,7 +529,7 @@ def test_describe_datetime_columns(self): assert result.columns.tz == expected.columns.tz def test_describe_timedelta_values(self): - # GH 6145 + # gh-6145 t1 = pd.timedelta_range('1 days', freq='D', periods=5) t2 = pd.timedelta_range('1 hours', freq='H', periods=5) df = pd.DataFrame({'t1': t1, 't2': t2}) @@ -566,7 +566,7 @@ def test_describe_timedelta_values(self): assert repr(result) == exp_repr def test_describe_tz_values(self, tz_naive_fixture): - # GH 21332 + # gh-21332 tz = tz_naive_fixture s1 = Series(range(5)) start = Timestamp(2018, 1, 1) @@ -588,7 +588,7 @@ def test_describe_tz_values(self, tz_naive_fixture): tm.assert_frame_equal(result, expected) def test_reduce_mixed_frame(self): - # GH 6806 + # gh-6806 df = DataFrame({ 'bool_data': [True, True, False, False, False], 'int_data': [10, 20, 30, 40, 50], @@ -615,7 +615,7 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): ct2 = frame.count(0) assert isinstance(ct2, Series) - # GH 423 + # gh-423 df = DataFrame(index=lrange(10)) result = df.count(1) expected = Series(0, index=df.index) @@ -662,7 +662,7 @@ def test_sum(self, float_frame_with_na, mixed_float_frame, @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): - # GH 676 + # gh-676 data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], @@ -804,7 +804,7 @@ def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, @pytest.mark.parametrize( "meth", ['sem', 'var', 'std']) def test_numeric_only_flag(self, meth): - # GH 9201 + # gh-9201 df1 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) # set one entry to a number in str format df1.loc[0, 'foo'] = '100' @@ -830,7 +830,7 @@ def test_numeric_only_flag(self, meth): @pytest.mark.parametrize('op', ['mean', 'std', 'var', 'skew', 'kurt', 'sem']) def test_mixed_ops(self, op): - # GH 16116 + # gh-16116 df = DataFrame({'int': [1, 2, 3, 4], 'float': [1., 2., 3., 4.], 'str': ['a', 'b', 'c', 'd']}) @@ -1086,7 +1086,7 @@ def test_operators_timedelta64(self): timedelta(days=-1)], index=['A', 'B']) tm.assert_series_equal(result, expected) - # GH 3106 + # gh-3106 df = DataFrame({'time': date_range('20130102', periods=5), 'time2': date_range('20130105', periods=5)}) df['off1'] = df['time2'] - df['time'] @@ -1369,12 +1369,12 @@ def test_any_all_extra(self): (np.any, {'A': pd.Series([1, 2], dtype='category')}, True), # # Mix - # GH 21484 + # gh-21484 # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), # 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), ]) def test_any_all_np_func(self, func, data, expected): - # GH 19976 + # gh-19976 data = DataFrame(data) result = func(data) assert isinstance(result, np.bool_) @@ -1386,7 +1386,7 @@ def test_any_all_np_func(self, func, data, expected): assert result.item() is expected def test_any_all_object(self): - # GH 19976 + # gh-19976 result = np.all(DataFrame(columns=['a', 'b'])).item() assert result is True @@ -1408,7 +1408,7 @@ def test_any_all_level_axis_none_raises(self, method): # Isin def test_isin(self): - # GH 4211 + # gh-4211 df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) @@ -1420,7 +1420,7 @@ def test_isin(self): @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) def test_isin_empty(self, empty): - # GH 16991 + # gh-16991 df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']}) expected = DataFrame(False, df.index, df.columns) @@ -1446,7 +1446,7 @@ def test_isin_dict(self): tm.assert_frame_equal(result, expected) def test_isin_with_string_scalar(self): - # GH 4763 + # gh-4763 df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) @@ -1472,7 +1472,7 @@ def test_isin_df(self): tm.assert_frame_equal(result, expected) def test_isin_tuples(self): - # GH 16394 + # gh-16394 df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) df['C'] = list(zip(df['A'], df['B'])) result = df['C'].isin([(1, 'a')]) @@ -1542,7 +1542,7 @@ def test_isin_multiIndex(self): tm.assert_frame_equal(result, expected) def test_isin_empty_datetimelike(self): - # GH 15473 + # gh-15473 df1_ts = DataFrame({'date': pd.to_datetime(['2014-01-01', '2014-01-02'])}) df1_td = DataFrame({'date': @@ -1564,7 +1564,7 @@ def test_isin_empty_datetimelike(self): # Rounding def test_round(self): - # GH 2665 + # gh-2665 # Test that rounding an empty DataFrame does nothing df = DataFrame() @@ -1667,7 +1667,7 @@ def test_round(self): tm.assert_series_equal(df['col1'].round(1), expected_rounded['col1']) # named columns - # GH 11986 + # gh-11986 decimals = 2 expected_rounded = DataFrame( {'col1': [1.12, 2.12, 3.12], 'col2': [1.23, 2.23, 3.23]}) @@ -1682,7 +1682,7 @@ def test_round(self): expected_rounded['col1']) def test_numpy_round(self): - # GH 12600 + # gh-12600 df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) out = np.round(df, decimals=0) expected = DataFrame([[2., 1.], [0., 7.]]) @@ -1693,7 +1693,7 @@ def test_numpy_round(self): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): - # GH 11885 + # gh-11885 df = DataFrame({'col1': [1.1, 2.2, 3.3, 4.4], 'col2': ['1', 'a', 'c', 'f'], 'col3': date_range('20111111', periods=4)}) @@ -1708,7 +1708,7 @@ def test_round_mixed_type(self): tm.assert_frame_equal(df.round({'col3': 1}), df) def test_round_issue(self): - # GH 11611 + # gh-11611 df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'], index=['first', 'second', 'third']) @@ -1725,7 +1725,7 @@ def test_built_in_round(self): pytest.skip("build in round cannot be overridden " "prior to Python 3") - # GH 11763 + # gh-11763 # Here's the test frame we'll be working with df = DataFrame( {'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]}) @@ -1736,7 +1736,7 @@ def test_built_in_round(self): tm.assert_frame_equal(round(df), expected_rounded) def test_pct_change(self): - # GH 11150 + # gh-11150 pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( 0, 40, 10)]).astype(np.float64) pnl.iat[1, 0] = np.nan @@ -1769,7 +1769,7 @@ def test_clip(self, float_frame): assert (float_frame.values == original.values).all() def test_inplace_clip(self, float_frame): - # GH 15388 + # gh-15388 median = float_frame.median().median() frame_copy = float_frame.copy() @@ -1785,7 +1785,7 @@ def test_inplace_clip(self, float_frame): assert not (frame_copy.values != median).any() def test_dataframe_clip(self): - # GH 2747 + # gh-2747 df = DataFrame(np.random.randn(1000, 2)) for lb, ub in [(-1, 1), (1, -1)]: @@ -1812,7 +1812,7 @@ def test_clip_mixed_numeric(self): @pytest.mark.parametrize("inplace", [True, False]) def test_clip_against_series(self, inplace): - # GH 6966 + # gh-6966 df = DataFrame(np.random.randn(1000, 2)) lb = Series(np.random.randn(1000)) @@ -1847,7 +1847,7 @@ def test_clip_against_series(self, inplace): ]) def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): - # GH 15390 + # gh-15390 original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], @@ -1878,12 +1878,12 @@ def test_clip_against_frame(self, axis): def test_clip_with_na_args(self, float_frame): """Should process np.nan argument as None """ - # GH 17276 + # gh-17276 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) - # GH 19992 + # gh-19992 df = DataFrame({'col_0': [1, 2, 3], 'col_1': [4, 5, 6], 'col_2': [7, 8, 9]}) @@ -1956,7 +1956,7 @@ def test_dot(self): _np_version_under1p12, reason="unpredictable return types under numpy < 1.12") def test_matmul(self): - # matmul test is for GH 10259 + # matmul test is for gh-10259 a = DataFrame(np.random.randn(3, 4), index=['a', 'b', 'c'], columns=['p', 'q', 'r', 's']) b = DataFrame(np.random.randn(4, 2), index=['p', 'q', 'r', 's'], @@ -2070,7 +2070,7 @@ class TestNLargestNSmallest(object): ['b', 'c', 'c']]) @pytest.mark.parametrize('n', range(1, 11)) def test_n(self, df_strings, nselect_method, n, order): - # GH 10393 + # gh-10393 df = df_strings if 'b' in order: @@ -2103,7 +2103,7 @@ def test_n_all_dtypes(self, df_main_dtypes): df.nlargest(2, list(set(df) - {'category_string', 'string'})) def test_n_identical_values(self): - # GH 15297 + # gh-15297 df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) result = df.nlargest(3, 'a') @@ -2125,7 +2125,7 @@ def test_n_identical_values(self): ['c', 'b']]) @pytest.mark.parametrize('n', range(1, 6)) def test_n_duplicate_index(self, df_duplicates, n, order): - # GH 13412 + # gh-13412 df = df_duplicates result = df.nsmallest(n, order) @@ -2137,7 +2137,7 @@ def test_n_duplicate_index(self, df_duplicates, n, order): tm.assert_frame_equal(result, expected) def test_duplicate_keep_all_ties(self): - # GH 16818 + # gh-16818 df = pd.DataFrame({'a': [5, 4, 4, 2, 3, 3, 3, 3], 'b': [10, 9, 8, 7, 5, 50, 10, 20]}) result = df.nlargest(4, 'a', keep='all') @@ -2154,7 +2154,7 @@ def test_duplicate_keep_all_ties(self): def test_series_broadcasting(self): # smoke test for numpy warnings - # GH 16378, GH 16306 + # gh-16378, gh-16306 df = DataFrame([1.0, 1.0, 1.0]) df_nan = DataFrame({'A': [np.nan, 2.0, np.nan]}) s = Series([1, 1, 1]) @@ -2166,7 +2166,7 @@ def test_series_broadcasting(self): getattr(df, op)(s_nan, axis=0) def test_series_nat_conversion(self): - # GH 18521 + # gh-18521 # Check rank does not mutate DataFrame df = DataFrame(np.random.randn(10, 3), dtype='float64') expected = df.copy() diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 4b4296e4abc16..87ea3f63f5a16 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -192,7 +192,7 @@ def test_iteritems(self): assert isinstance(v, self.klass._constructor_sliced) def test_items(self): - # GH 17213, GH 13918 + # gh-17213, gh-13918 cols = ['a', 'b', 'c'] df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) for c, (k, v) in zip(cols, df.items()): @@ -213,7 +213,7 @@ def test_iterrows(self, float_frame, float_string_frame): self._assert_series_equal(v, exp) def test_iterrows_iso8601(self): - # GH 19671 + # gh-19671 if self.klass == SparseDataFrame: pytest.xfail(reason='SparseBlock datetime type not implemented.') @@ -266,7 +266,7 @@ def test_itertuples(self, float_frame): def test_sequence_like_with_categorical(self): - # GH 7839 + # gh-7839 # make sure can iterate df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) @@ -354,7 +354,7 @@ def test_axis_aliases(self, float_frame): assert_series_equal(result, expected) def test_class_axis(self): - # GH 18147 + # gh-18147 # no exception and no empty docstring assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) @@ -418,7 +418,7 @@ def test_values(self, float_frame): assert (float_frame.values[:, 0] == 5).all() def test_as_matrix_deprecated(self, float_frame): - # GH 18458 + # gh-18458 with tm.assert_produces_warning(FutureWarning): cols = float_frame.columns.tolist() result = float_frame.as_matrix(columns=cols) @@ -439,7 +439,7 @@ def test_transpose_get_view(self, float_frame): assert (float_frame.values[5:10] == 5).all() def test_inplace_return_self(self): - # GH 1893 + # gh-1893 data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'], 'b': [0, 0, 1, 1], @@ -503,7 +503,7 @@ def _check_f(base, f): _check_f(d.copy(), f) def test_tab_complete_warning(self, ip): - # GH 16409 + # gh-16409 pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index ca3469f34fee6..4f5bdc999b7bc 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -59,7 +59,7 @@ def test_apply(self, float_frame): [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) pytest.raises(ValueError, df.apply, lambda x: x, 2) - # GH 9573 + # gh-9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) @@ -70,7 +70,7 @@ def test_apply(self, float_frame): def test_apply_mixed_datetimelike(self): # mixed datetimelike - # GH 7778 + # gh-7778 df = DataFrame({'A': date_range('20130101', periods=3), 'B': pd.to_timedelta(np.arange(3), unit='s')}) result = df.apply(lambda x: x, axis=1) @@ -94,7 +94,7 @@ def test_apply_empty(self, float_frame, empty_frame): expected = Series(np.nan, index=float_frame.index) assert_series_equal(result, expected) - # GH 2476 + # gh-2476 expected = DataFrame(index=['a']) result = expected.apply(lambda x: x['a'], axis=1) assert_frame_equal(expected, result) @@ -371,7 +371,7 @@ def transform2(row): def test_apply_bug(self): - # GH 6125 + # gh-6125 positions = pd.DataFrame([[1, 'ABC0', 50], [1, 'YUM0', 20], [1, 'DEF0', 20], [2, 'ABC1', 50], [2, 'YUM1', 20], [2, 'DEF1', 20]], @@ -446,7 +446,7 @@ def test_apply_multi_index(self, float_frame): def test_apply_dict(self): - # GH 8735 + # gh-8735 A = DataFrame([['foo', 'bar'], ['spam', 'eggs']]) A_dicts = Series([dict([(0, 'foo'), (1, 'spam')]), dict([(0, 'bar'), (1, 'eggs')])]) @@ -468,11 +468,11 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) - # GH 465: function returning tuples + # gh-465: function returning tuples result = float_frame.applymap(lambda x: (x, x)) assert isinstance(result['A'][0], tuple) - # GH 2909: object conversion to float in constructor? + # gh-2909: object conversion to float in constructor? df = DataFrame(data=[1, 'a']) result = df.applymap(lambda x: x) assert result.dtypes[0] == object @@ -481,7 +481,7 @@ def test_applymap(self, float_frame): result = df.applymap(lambda x: x) assert result.dtypes[0] == object - # GH 2786 + # gh-2786 df = DataFrame(np.random.random((3, 4))) df2 = df.copy() cols = ['a', 'a', 'a', 'a'] @@ -499,7 +499,7 @@ def test_applymap(self, float_frame): for f in ['datetime', 'timedelta']: assert result.loc[0, f] == str(df.loc[0, f]) - # GH 8222 + # gh-8222 empty_frames = [pd.DataFrame(), pd.DataFrame(columns=list('ABC')), pd.DataFrame(index=list('ABC')), @@ -510,7 +510,7 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(result, frame) def test_applymap_box_timestamps(self): - # GH 2689, GH 2627 + # gh-2689, gh-2627 ser = pd.Series(date_range('1/1/2000', periods=10)) def func(x): @@ -547,7 +547,7 @@ def test_frame_apply_dont_convert_datetime64(self): assert df.x1.dtype == 'M8[ns]' def test_apply_non_numpy_dtype(self): - # GH 12244 + # gh-12244 df = DataFrame({'dt': pd.date_range( "2015-01-01", periods=3, tz='Europe/Brussels')}) result = df.apply(lambda x: x) @@ -563,7 +563,7 @@ def test_apply_non_numpy_dtype(self): assert_frame_equal(result, df) def test_apply_dup_names_multi_agg(self): - # GH 21063 + # gh-21063 df = pd.DataFrame([[0, 1], [2, 3]], columns=['a', 'a']) expected = pd.DataFrame([[0, 1]], columns=['a', 'a'], index=['min']) result = df.agg(['min']) @@ -577,7 +577,7 @@ class TestInferOutputShape(object): # us to infer the output def test_infer_row_shape(self): - # GH 17437 + # gh-17437 # if row shape is changing, infer it df = pd.DataFrame(np.random.rand(10, 2)) result = df.apply(np.fft.fft, axis=0) @@ -587,7 +587,7 @@ def test_infer_row_shape(self): assert result.shape == (6, 2) def test_with_dictlike_columns(self): - # GH 17602 + # gh-17602 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.apply(lambda x: {'s': x['a'] + x['b']}, axis=1) @@ -605,7 +605,7 @@ def test_with_dictlike_columns(self): expected = Series([{'s': 3}, {'s': 3}]) assert_series_equal(result, expected) - # GH 18775 + # gh-18775 df = DataFrame() df["author"] = ["X", "Y", "Z"] df["publisher"] = ["BBC", "NBC", "N24"] @@ -617,7 +617,7 @@ def test_with_dictlike_columns(self): assert_series_equal(result, expected) def test_with_dictlike_columns_with_infer(self): - # GH 17602 + # gh-17602 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.apply(lambda x: {'s': x['a'] + x['b']}, axis=1, result_type='expand') @@ -631,7 +631,7 @@ def test_with_dictlike_columns_with_infer(self): assert_frame_equal(result, expected) def test_with_listlike_columns(self): - # GH 17348 + # gh-17348 df = DataFrame({'a': Series(np.random.randn(4)), 'b': ['a', 'list', 'of', 'words'], 'ts': date_range('2016-10-01', periods=4, freq='H')}) @@ -644,7 +644,7 @@ def test_with_listlike_columns(self): expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()]) assert_series_equal(result, expected) - # GH 18919 + # gh-18919 df = DataFrame({'x': Series([['a', 'b'], ['q']]), 'y': Series([['z'], ['q', 't']])}) df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')]) @@ -656,7 +656,7 @@ def test_with_listlike_columns(self): assert_series_equal(result, expected) def test_infer_output_shape_columns(self): - # GH 18573 + # gh-18573 df = DataFrame({'number': [1., 2.], 'string': ['foo', 'bar'], @@ -667,7 +667,7 @@ def test_infer_output_shape_columns(self): assert_series_equal(result, expected) def test_infer_output_shape_listlike_columns(self): - # GH 16353 + # gh-16353 df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) @@ -679,7 +679,7 @@ def test_infer_output_shape_listlike_columns(self): expected = Series([[1, 2] for t in df.itertuples()]) assert_series_equal(result, expected) - # GH 17970 + # gh-17970 df = DataFrame({"a": [1, 2, 3]}, index=list('abc')) result = df.apply(lambda row: np.ones(1), axis=1) @@ -692,7 +692,7 @@ def test_infer_output_shape_listlike_columns(self): index=df.index) assert_series_equal(result, expected) - # GH 17892 + # gh-17892 df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'), pd.Timestamp('2010-02-04'), pd.Timestamp('2010-02-05'), @@ -900,7 +900,7 @@ def f(): 'abs', 'shift', 'pct_change', 'cumsum', 'rank', ]) def test_transform_method_name(self, method): - # GH 19760 + # gh-19760 df = pd.DataFrame({"A": [-1, 2]}) result = df.transform(method) expected = operator.methodcaller(method)(df) @@ -924,7 +924,7 @@ def test_demo(self): tm.assert_frame_equal(result.reindex_like(expected), expected) def test_agg_multiple_mixed_no_warning(self): - # GH 20909 + # gh-20909 mdf = pd.DataFrame({'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], @@ -1007,7 +1007,7 @@ def test_agg_reduce(self, axis, float_frame): def test_nuiscance_columns(self): - # GH 15015 + # gh-15015 df = DataFrame({'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], @@ -1035,7 +1035,7 @@ def test_nuiscance_columns(self): def test_non_callable_aggregates(self): - # GH 16405 + # gh-16405 # 'size' is a property of frame/series # validate that this is working df = DataFrame({'A': [None, 2, 3], @@ -1107,7 +1107,7 @@ def test_non_callable_aggregates(self): ]), )) def test_agg_cython_table(self, df, func, expected, axis): - # GH 21224 + # gh-21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = df.agg(func, axis=axis) @@ -1126,7 +1126,7 @@ def test_agg_cython_table(self, df, func, expected, axis): ]), )) def test_agg_cython_table_transform(self, df, func, expected, axis): - # GH 21224 + # gh-21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = df.agg(func, axis=axis) @@ -1138,7 +1138,7 @@ def test_agg_cython_table_transform(self, df, func, expected, axis): ]), ) def test_agg_cython_table_raises(self, df, func, expected, axis): - # GH 21224 + # gh-21224 with pytest.raises(expected): df.agg(func, axis=axis) @@ -1157,7 +1157,7 @@ def indices(draw, max_length=5): @given(index=indices(5), num_columns=integers(0, 5)) def test_frequency_is_original(self, index, num_columns): - # GH 22150 + # gh-22150 original = index.copy() df = DataFrame(True, index=index, columns=range(num_columns)) df.apply(lambda x: x) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f955206062a5a..7584fd92b9f96 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -17,7 +17,7 @@ class TestFrameComparisons(object): def test_flex_comparison_nat(self): - # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # gh-15697, gh-22163 df.eq(pd.NaT) should behave like df == pd.NaT, # and _definitely_ not be NaN df = pd.DataFrame([pd.NaT]) @@ -35,7 +35,7 @@ def test_flex_comparison_nat(self): assert result.iloc[0, 0].item() is True def test_mixed_comparison(self): - # GH 13128, GH 22163 != datetime64 vs non-dt64 should be False, + # gh-13128, gh-22163 != datetime64 vs non-dt64 should be False, # not raise TypeError # (this appears to be fixed before #22163, not sure when) df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]]) @@ -48,7 +48,7 @@ def test_mixed_comparison(self): assert result.all().all() def test_df_boolean_comparison_error(self): - # GH 4576 + # gh-4576 # boolean comparisons with a tuple/list give unexpected results df = pd.DataFrame(np.arange(6).reshape((3, 2))) @@ -77,7 +77,7 @@ def test_df_string_comparison(self): @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types(self, opname): - # GH 15077, non-empty DataFrame + # gh-15077, non-empty DataFrame df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 @@ -86,7 +86,7 @@ def test_df_flex_cmp_constant_return_types(self, opname): @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types_empty(self, opname): - # GH 15077 empty DataFrame + # gh-15077 empty DataFrame df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 @@ -100,7 +100,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): class TestFrameFlexArithmetic(object): def test_df_add_td64_columnwise(self): - # GH 22534 Check that column-wise addition broadcasts correctly + # gh-22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) tdi = pd.timedelta_range('1', periods=10) tser = pd.Series(tdi) @@ -112,7 +112,7 @@ def test_df_add_td64_columnwise(self): tm.assert_frame_equal(result, expected) def test_df_add_flex_filled_mixed_dtypes(self): - # GH 19611 + # gh-19611 dti = pd.date_range('2016-01-01', periods=3) ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]') df = pd.DataFrame({'A': dti, 'B': ser}) @@ -224,7 +224,7 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(df.div(row), df / row) tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - # broadcasting issue in GH 7325 + # broadcasting issue in gh-7325 df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='int64') expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0], axis='index') @@ -236,7 +236,7 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(result, expected) def test_arith_flex_zero_len_raises(self): - # GH 19522 passing fill_value to frame flex arith methods should + # gh-19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases ser_len0 = pd.Series([]) df_len0 = pd.DataFrame([], columns=['A', 'B']) @@ -251,7 +251,7 @@ def test_arith_flex_zero_len_raises(self): class TestFrameArithmetic(object): def test_df_bool_mul_int(self): - # GH 22047, GH 22163 multiplication by 1 should result in int dtype, + # gh-22047, gh-22163 multiplication by 1 should result in int dtype, # not object dtype df = pd.DataFrame([[False, True], [False, False]]) result = df * 1 From 733b889ee5eb77aa3764298db5d261b3b4f2c767 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 23 Sep 2018 23:47:21 +0200 Subject: [PATCH 12/13] Revert "Change GH reference formatting (review jreback)" This reverts commit 855a1866a240629cde8cb9a31306c4d8fdc92317. --- pandas/tests/frame/test_analytics.py | 82 +++++++++++++-------------- pandas/tests/frame/test_api.py | 14 ++--- pandas/tests/frame/test_apply.py | 60 ++++++++++---------- pandas/tests/frame/test_arithmetic.py | 20 +++---- 4 files changed, 88 insertions(+), 88 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 6f1713eb72348..827a817534dd9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -262,7 +262,7 @@ def test_corr_int_and_boolean(self): tm.assert_frame_equal(result, expected) def test_corr_cov_independent_index_column(self): - # gh-14617 + # GH 14617 df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) for method in ['cov', 'corr']: @@ -271,7 +271,7 @@ def test_corr_cov_independent_index_column(self): assert result.index.equals(result.columns) def test_corr_invalid_method(self): - # gh-22298 + # GH 22298 df = pd. DataFrame(np.random.normal(size=(10, 2))) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") @@ -387,7 +387,7 @@ def test_corrwith_matches_corrcoef(self): assert c1 < 1 def test_corrwith_mixed_dtypes(self): - # gh-18570 + # GH 18570 df = pd.DataFrame({'a': [1, 4, 3, 2], 'b': [4, 6, 7, 3], 'c': ['a', 'b', 'c', 'd']}) s = pd.Series([0, 6, 7, 3]) @@ -420,7 +420,7 @@ def test_bool_describe_in_mixed_frame(self): tm.assert_frame_equal(result, expected) def test_describe_bool_frame(self): - # gh-13891 + # GH 13891 df = pd.DataFrame({ 'bool_data_1': [False, False, True, True], 'bool_data_2': [False, True, True, True] @@ -483,7 +483,7 @@ def test_describe_categorical(self): tm.assert_numpy_array_equal(result["cat"].values, result["s"].values) def test_describe_categorical_columns(self): - # gh-11558 + # GH 11558 columns = pd.CategoricalIndex(['int1', 'int2', 'obj'], ordered=True, name='XXX') df = DataFrame({'int1': [10, 20, 30, 40, 50], @@ -529,7 +529,7 @@ def test_describe_datetime_columns(self): assert result.columns.tz == expected.columns.tz def test_describe_timedelta_values(self): - # gh-6145 + # GH 6145 t1 = pd.timedelta_range('1 days', freq='D', periods=5) t2 = pd.timedelta_range('1 hours', freq='H', periods=5) df = pd.DataFrame({'t1': t1, 't2': t2}) @@ -566,7 +566,7 @@ def test_describe_timedelta_values(self): assert repr(result) == exp_repr def test_describe_tz_values(self, tz_naive_fixture): - # gh-21332 + # GH 21332 tz = tz_naive_fixture s1 = Series(range(5)) start = Timestamp(2018, 1, 1) @@ -588,7 +588,7 @@ def test_describe_tz_values(self, tz_naive_fixture): tm.assert_frame_equal(result, expected) def test_reduce_mixed_frame(self): - # gh-6806 + # GH 6806 df = DataFrame({ 'bool_data': [True, True, False, False, False], 'int_data': [10, 20, 30, 40, 50], @@ -615,7 +615,7 @@ def test_count(self, float_frame_with_na, float_frame, float_string_frame): ct2 = frame.count(0) assert isinstance(ct2, Series) - # gh-423 + # GH 423 df = DataFrame(index=lrange(10)) result = df.count(1) expected = Series(0, index=df.index) @@ -662,7 +662,7 @@ def test_sum(self, float_frame_with_na, mixed_float_frame, @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): - # gh-676 + # GH 676 data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], @@ -804,7 +804,7 @@ def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, @pytest.mark.parametrize( "meth", ['sem', 'var', 'std']) def test_numeric_only_flag(self, meth): - # gh-9201 + # GH 9201 df1 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) # set one entry to a number in str format df1.loc[0, 'foo'] = '100' @@ -830,7 +830,7 @@ def test_numeric_only_flag(self, meth): @pytest.mark.parametrize('op', ['mean', 'std', 'var', 'skew', 'kurt', 'sem']) def test_mixed_ops(self, op): - # gh-16116 + # GH 16116 df = DataFrame({'int': [1, 2, 3, 4], 'float': [1., 2., 3., 4.], 'str': ['a', 'b', 'c', 'd']}) @@ -1086,7 +1086,7 @@ def test_operators_timedelta64(self): timedelta(days=-1)], index=['A', 'B']) tm.assert_series_equal(result, expected) - # gh-3106 + # GH 3106 df = DataFrame({'time': date_range('20130102', periods=5), 'time2': date_range('20130105', periods=5)}) df['off1'] = df['time2'] - df['time'] @@ -1369,12 +1369,12 @@ def test_any_all_extra(self): (np.any, {'A': pd.Series([1, 2], dtype='category')}, True), # # Mix - # gh-21484 + # GH 21484 # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), # 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), ]) def test_any_all_np_func(self, func, data, expected): - # gh-19976 + # GH 19976 data = DataFrame(data) result = func(data) assert isinstance(result, np.bool_) @@ -1386,7 +1386,7 @@ def test_any_all_np_func(self, func, data, expected): assert result.item() is expected def test_any_all_object(self): - # gh-19976 + # GH 19976 result = np.all(DataFrame(columns=['a', 'b'])).item() assert result is True @@ -1408,7 +1408,7 @@ def test_any_all_level_axis_none_raises(self, method): # Isin def test_isin(self): - # gh-4211 + # GH 4211 df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) @@ -1420,7 +1420,7 @@ def test_isin(self): @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) def test_isin_empty(self, empty): - # gh-16991 + # GH 16991 df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']}) expected = DataFrame(False, df.index, df.columns) @@ -1446,7 +1446,7 @@ def test_isin_dict(self): tm.assert_frame_equal(result, expected) def test_isin_with_string_scalar(self): - # gh-4763 + # GH 4763 df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) @@ -1472,7 +1472,7 @@ def test_isin_df(self): tm.assert_frame_equal(result, expected) def test_isin_tuples(self): - # gh-16394 + # GH 16394 df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) df['C'] = list(zip(df['A'], df['B'])) result = df['C'].isin([(1, 'a')]) @@ -1542,7 +1542,7 @@ def test_isin_multiIndex(self): tm.assert_frame_equal(result, expected) def test_isin_empty_datetimelike(self): - # gh-15473 + # GH 15473 df1_ts = DataFrame({'date': pd.to_datetime(['2014-01-01', '2014-01-02'])}) df1_td = DataFrame({'date': @@ -1564,7 +1564,7 @@ def test_isin_empty_datetimelike(self): # Rounding def test_round(self): - # gh-2665 + # GH 2665 # Test that rounding an empty DataFrame does nothing df = DataFrame() @@ -1667,7 +1667,7 @@ def test_round(self): tm.assert_series_equal(df['col1'].round(1), expected_rounded['col1']) # named columns - # gh-11986 + # GH 11986 decimals = 2 expected_rounded = DataFrame( {'col1': [1.12, 2.12, 3.12], 'col2': [1.23, 2.23, 3.23]}) @@ -1682,7 +1682,7 @@ def test_round(self): expected_rounded['col1']) def test_numpy_round(self): - # gh-12600 + # GH 12600 df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) out = np.round(df, decimals=0) expected = DataFrame([[2., 1.], [0., 7.]]) @@ -1693,7 +1693,7 @@ def test_numpy_round(self): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): - # gh-11885 + # GH 11885 df = DataFrame({'col1': [1.1, 2.2, 3.3, 4.4], 'col2': ['1', 'a', 'c', 'f'], 'col3': date_range('20111111', periods=4)}) @@ -1708,7 +1708,7 @@ def test_round_mixed_type(self): tm.assert_frame_equal(df.round({'col3': 1}), df) def test_round_issue(self): - # gh-11611 + # GH 11611 df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'], index=['first', 'second', 'third']) @@ -1725,7 +1725,7 @@ def test_built_in_round(self): pytest.skip("build in round cannot be overridden " "prior to Python 3") - # gh-11763 + # GH 11763 # Here's the test frame we'll be working with df = DataFrame( {'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]}) @@ -1736,7 +1736,7 @@ def test_built_in_round(self): tm.assert_frame_equal(round(df), expected_rounded) def test_pct_change(self): - # gh-11150 + # GH 11150 pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( 0, 40, 10)]).astype(np.float64) pnl.iat[1, 0] = np.nan @@ -1769,7 +1769,7 @@ def test_clip(self, float_frame): assert (float_frame.values == original.values).all() def test_inplace_clip(self, float_frame): - # gh-15388 + # GH 15388 median = float_frame.median().median() frame_copy = float_frame.copy() @@ -1785,7 +1785,7 @@ def test_inplace_clip(self, float_frame): assert not (frame_copy.values != median).any() def test_dataframe_clip(self): - # gh-2747 + # GH 2747 df = DataFrame(np.random.randn(1000, 2)) for lb, ub in [(-1, 1), (1, -1)]: @@ -1812,7 +1812,7 @@ def test_clip_mixed_numeric(self): @pytest.mark.parametrize("inplace", [True, False]) def test_clip_against_series(self, inplace): - # gh-6966 + # GH 6966 df = DataFrame(np.random.randn(1000, 2)) lb = Series(np.random.randn(1000)) @@ -1847,7 +1847,7 @@ def test_clip_against_series(self, inplace): ]) def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): - # gh-15390 + # GH 15390 original = simple_frame.copy(deep=True) result = original.clip(lower=lower, upper=[5, 6, 7], @@ -1878,12 +1878,12 @@ def test_clip_against_frame(self, axis): def test_clip_with_na_args(self, float_frame): """Should process np.nan argument as None """ - # gh-17276 + # GH 17276 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) - # gh-19992 + # GH 19992 df = DataFrame({'col_0': [1, 2, 3], 'col_1': [4, 5, 6], 'col_2': [7, 8, 9]}) @@ -1956,7 +1956,7 @@ def test_dot(self): _np_version_under1p12, reason="unpredictable return types under numpy < 1.12") def test_matmul(self): - # matmul test is for gh-10259 + # matmul test is for GH 10259 a = DataFrame(np.random.randn(3, 4), index=['a', 'b', 'c'], columns=['p', 'q', 'r', 's']) b = DataFrame(np.random.randn(4, 2), index=['p', 'q', 'r', 's'], @@ -2070,7 +2070,7 @@ class TestNLargestNSmallest(object): ['b', 'c', 'c']]) @pytest.mark.parametrize('n', range(1, 11)) def test_n(self, df_strings, nselect_method, n, order): - # gh-10393 + # GH 10393 df = df_strings if 'b' in order: @@ -2103,7 +2103,7 @@ def test_n_all_dtypes(self, df_main_dtypes): df.nlargest(2, list(set(df) - {'category_string', 'string'})) def test_n_identical_values(self): - # gh-15297 + # GH 15297 df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) result = df.nlargest(3, 'a') @@ -2125,7 +2125,7 @@ def test_n_identical_values(self): ['c', 'b']]) @pytest.mark.parametrize('n', range(1, 6)) def test_n_duplicate_index(self, df_duplicates, n, order): - # gh-13412 + # GH 13412 df = df_duplicates result = df.nsmallest(n, order) @@ -2137,7 +2137,7 @@ def test_n_duplicate_index(self, df_duplicates, n, order): tm.assert_frame_equal(result, expected) def test_duplicate_keep_all_ties(self): - # gh-16818 + # GH 16818 df = pd.DataFrame({'a': [5, 4, 4, 2, 3, 3, 3, 3], 'b': [10, 9, 8, 7, 5, 50, 10, 20]}) result = df.nlargest(4, 'a', keep='all') @@ -2154,7 +2154,7 @@ def test_duplicate_keep_all_ties(self): def test_series_broadcasting(self): # smoke test for numpy warnings - # gh-16378, gh-16306 + # GH 16378, GH 16306 df = DataFrame([1.0, 1.0, 1.0]) df_nan = DataFrame({'A': [np.nan, 2.0, np.nan]}) s = Series([1, 1, 1]) @@ -2166,7 +2166,7 @@ def test_series_broadcasting(self): getattr(df, op)(s_nan, axis=0) def test_series_nat_conversion(self): - # gh-18521 + # GH 18521 # Check rank does not mutate DataFrame df = DataFrame(np.random.randn(10, 3), dtype='float64') expected = df.copy() diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 87ea3f63f5a16..4b4296e4abc16 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -192,7 +192,7 @@ def test_iteritems(self): assert isinstance(v, self.klass._constructor_sliced) def test_items(self): - # gh-17213, gh-13918 + # GH 17213, GH 13918 cols = ['a', 'b', 'c'] df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) for c, (k, v) in zip(cols, df.items()): @@ -213,7 +213,7 @@ def test_iterrows(self, float_frame, float_string_frame): self._assert_series_equal(v, exp) def test_iterrows_iso8601(self): - # gh-19671 + # GH 19671 if self.klass == SparseDataFrame: pytest.xfail(reason='SparseBlock datetime type not implemented.') @@ -266,7 +266,7 @@ def test_itertuples(self, float_frame): def test_sequence_like_with_categorical(self): - # gh-7839 + # GH 7839 # make sure can iterate df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) @@ -354,7 +354,7 @@ def test_axis_aliases(self, float_frame): assert_series_equal(result, expected) def test_class_axis(self): - # gh-18147 + # GH 18147 # no exception and no empty docstring assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) @@ -418,7 +418,7 @@ def test_values(self, float_frame): assert (float_frame.values[:, 0] == 5).all() def test_as_matrix_deprecated(self, float_frame): - # gh-18458 + # GH 18458 with tm.assert_produces_warning(FutureWarning): cols = float_frame.columns.tolist() result = float_frame.as_matrix(columns=cols) @@ -439,7 +439,7 @@ def test_transpose_get_view(self, float_frame): assert (float_frame.values[5:10] == 5).all() def test_inplace_return_self(self): - # gh-1893 + # GH 1893 data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'], 'b': [0, 0, 1, 1], @@ -503,7 +503,7 @@ def _check_f(base, f): _check_f(d.copy(), f) def test_tab_complete_warning(self, ip): - # gh-16409 + # GH 16409 pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 4f5bdc999b7bc..ca3469f34fee6 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -59,7 +59,7 @@ def test_apply(self, float_frame): [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) pytest.raises(ValueError, df.apply, lambda x: x, 2) - # gh-9573 + # GH 9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) @@ -70,7 +70,7 @@ def test_apply(self, float_frame): def test_apply_mixed_datetimelike(self): # mixed datetimelike - # gh-7778 + # GH 7778 df = DataFrame({'A': date_range('20130101', periods=3), 'B': pd.to_timedelta(np.arange(3), unit='s')}) result = df.apply(lambda x: x, axis=1) @@ -94,7 +94,7 @@ def test_apply_empty(self, float_frame, empty_frame): expected = Series(np.nan, index=float_frame.index) assert_series_equal(result, expected) - # gh-2476 + # GH 2476 expected = DataFrame(index=['a']) result = expected.apply(lambda x: x['a'], axis=1) assert_frame_equal(expected, result) @@ -371,7 +371,7 @@ def transform2(row): def test_apply_bug(self): - # gh-6125 + # GH 6125 positions = pd.DataFrame([[1, 'ABC0', 50], [1, 'YUM0', 20], [1, 'DEF0', 20], [2, 'ABC1', 50], [2, 'YUM1', 20], [2, 'DEF1', 20]], @@ -446,7 +446,7 @@ def test_apply_multi_index(self, float_frame): def test_apply_dict(self): - # gh-8735 + # GH 8735 A = DataFrame([['foo', 'bar'], ['spam', 'eggs']]) A_dicts = Series([dict([(0, 'foo'), (1, 'spam')]), dict([(0, 'bar'), (1, 'eggs')])]) @@ -468,11 +468,11 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) - # gh-465: function returning tuples + # GH 465: function returning tuples result = float_frame.applymap(lambda x: (x, x)) assert isinstance(result['A'][0], tuple) - # gh-2909: object conversion to float in constructor? + # GH 2909: object conversion to float in constructor? df = DataFrame(data=[1, 'a']) result = df.applymap(lambda x: x) assert result.dtypes[0] == object @@ -481,7 +481,7 @@ def test_applymap(self, float_frame): result = df.applymap(lambda x: x) assert result.dtypes[0] == object - # gh-2786 + # GH 2786 df = DataFrame(np.random.random((3, 4))) df2 = df.copy() cols = ['a', 'a', 'a', 'a'] @@ -499,7 +499,7 @@ def test_applymap(self, float_frame): for f in ['datetime', 'timedelta']: assert result.loc[0, f] == str(df.loc[0, f]) - # gh-8222 + # GH 8222 empty_frames = [pd.DataFrame(), pd.DataFrame(columns=list('ABC')), pd.DataFrame(index=list('ABC')), @@ -510,7 +510,7 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(result, frame) def test_applymap_box_timestamps(self): - # gh-2689, gh-2627 + # GH 2689, GH 2627 ser = pd.Series(date_range('1/1/2000', periods=10)) def func(x): @@ -547,7 +547,7 @@ def test_frame_apply_dont_convert_datetime64(self): assert df.x1.dtype == 'M8[ns]' def test_apply_non_numpy_dtype(self): - # gh-12244 + # GH 12244 df = DataFrame({'dt': pd.date_range( "2015-01-01", periods=3, tz='Europe/Brussels')}) result = df.apply(lambda x: x) @@ -563,7 +563,7 @@ def test_apply_non_numpy_dtype(self): assert_frame_equal(result, df) def test_apply_dup_names_multi_agg(self): - # gh-21063 + # GH 21063 df = pd.DataFrame([[0, 1], [2, 3]], columns=['a', 'a']) expected = pd.DataFrame([[0, 1]], columns=['a', 'a'], index=['min']) result = df.agg(['min']) @@ -577,7 +577,7 @@ class TestInferOutputShape(object): # us to infer the output def test_infer_row_shape(self): - # gh-17437 + # GH 17437 # if row shape is changing, infer it df = pd.DataFrame(np.random.rand(10, 2)) result = df.apply(np.fft.fft, axis=0) @@ -587,7 +587,7 @@ def test_infer_row_shape(self): assert result.shape == (6, 2) def test_with_dictlike_columns(self): - # gh-17602 + # GH 17602 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.apply(lambda x: {'s': x['a'] + x['b']}, axis=1) @@ -605,7 +605,7 @@ def test_with_dictlike_columns(self): expected = Series([{'s': 3}, {'s': 3}]) assert_series_equal(result, expected) - # gh-18775 + # GH 18775 df = DataFrame() df["author"] = ["X", "Y", "Z"] df["publisher"] = ["BBC", "NBC", "N24"] @@ -617,7 +617,7 @@ def test_with_dictlike_columns(self): assert_series_equal(result, expected) def test_with_dictlike_columns_with_infer(self): - # gh-17602 + # GH 17602 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.apply(lambda x: {'s': x['a'] + x['b']}, axis=1, result_type='expand') @@ -631,7 +631,7 @@ def test_with_dictlike_columns_with_infer(self): assert_frame_equal(result, expected) def test_with_listlike_columns(self): - # gh-17348 + # GH 17348 df = DataFrame({'a': Series(np.random.randn(4)), 'b': ['a', 'list', 'of', 'words'], 'ts': date_range('2016-10-01', periods=4, freq='H')}) @@ -644,7 +644,7 @@ def test_with_listlike_columns(self): expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()]) assert_series_equal(result, expected) - # gh-18919 + # GH 18919 df = DataFrame({'x': Series([['a', 'b'], ['q']]), 'y': Series([['z'], ['q', 't']])}) df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')]) @@ -656,7 +656,7 @@ def test_with_listlike_columns(self): assert_series_equal(result, expected) def test_infer_output_shape_columns(self): - # gh-18573 + # GH 18573 df = DataFrame({'number': [1., 2.], 'string': ['foo', 'bar'], @@ -667,7 +667,7 @@ def test_infer_output_shape_columns(self): assert_series_equal(result, expected) def test_infer_output_shape_listlike_columns(self): - # gh-16353 + # GH 16353 df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) @@ -679,7 +679,7 @@ def test_infer_output_shape_listlike_columns(self): expected = Series([[1, 2] for t in df.itertuples()]) assert_series_equal(result, expected) - # gh-17970 + # GH 17970 df = DataFrame({"a": [1, 2, 3]}, index=list('abc')) result = df.apply(lambda row: np.ones(1), axis=1) @@ -692,7 +692,7 @@ def test_infer_output_shape_listlike_columns(self): index=df.index) assert_series_equal(result, expected) - # gh-17892 + # GH 17892 df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'), pd.Timestamp('2010-02-04'), pd.Timestamp('2010-02-05'), @@ -900,7 +900,7 @@ def f(): 'abs', 'shift', 'pct_change', 'cumsum', 'rank', ]) def test_transform_method_name(self, method): - # gh-19760 + # GH 19760 df = pd.DataFrame({"A": [-1, 2]}) result = df.transform(method) expected = operator.methodcaller(method)(df) @@ -924,7 +924,7 @@ def test_demo(self): tm.assert_frame_equal(result.reindex_like(expected), expected) def test_agg_multiple_mixed_no_warning(self): - # gh-20909 + # GH 20909 mdf = pd.DataFrame({'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], @@ -1007,7 +1007,7 @@ def test_agg_reduce(self, axis, float_frame): def test_nuiscance_columns(self): - # gh-15015 + # GH 15015 df = DataFrame({'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], @@ -1035,7 +1035,7 @@ def test_nuiscance_columns(self): def test_non_callable_aggregates(self): - # gh-16405 + # GH 16405 # 'size' is a property of frame/series # validate that this is working df = DataFrame({'A': [None, 2, 3], @@ -1107,7 +1107,7 @@ def test_non_callable_aggregates(self): ]), )) def test_agg_cython_table(self, df, func, expected, axis): - # gh-21224 + # GH 21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = df.agg(func, axis=axis) @@ -1126,7 +1126,7 @@ def test_agg_cython_table(self, df, func, expected, axis): ]), )) def test_agg_cython_table_transform(self, df, func, expected, axis): - # gh-21224 + # GH 21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = df.agg(func, axis=axis) @@ -1138,7 +1138,7 @@ def test_agg_cython_table_transform(self, df, func, expected, axis): ]), ) def test_agg_cython_table_raises(self, df, func, expected, axis): - # gh-21224 + # GH 21224 with pytest.raises(expected): df.agg(func, axis=axis) @@ -1157,7 +1157,7 @@ def indices(draw, max_length=5): @given(index=indices(5), num_columns=integers(0, 5)) def test_frequency_is_original(self, index, num_columns): - # gh-22150 + # GH 22150 original = index.copy() df = DataFrame(True, index=index, columns=range(num_columns)) df.apply(lambda x: x) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 7584fd92b9f96..f955206062a5a 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -17,7 +17,7 @@ class TestFrameComparisons(object): def test_flex_comparison_nat(self): - # gh-15697, gh-22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, # and _definitely_ not be NaN df = pd.DataFrame([pd.NaT]) @@ -35,7 +35,7 @@ def test_flex_comparison_nat(self): assert result.iloc[0, 0].item() is True def test_mixed_comparison(self): - # gh-13128, gh-22163 != datetime64 vs non-dt64 should be False, + # GH 13128, GH 22163 != datetime64 vs non-dt64 should be False, # not raise TypeError # (this appears to be fixed before #22163, not sure when) df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]]) @@ -48,7 +48,7 @@ def test_mixed_comparison(self): assert result.all().all() def test_df_boolean_comparison_error(self): - # gh-4576 + # GH 4576 # boolean comparisons with a tuple/list give unexpected results df = pd.DataFrame(np.arange(6).reshape((3, 2))) @@ -77,7 +77,7 @@ def test_df_string_comparison(self): @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types(self, opname): - # gh-15077, non-empty DataFrame + # GH 15077, non-empty DataFrame df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 @@ -86,7 +86,7 @@ def test_df_flex_cmp_constant_return_types(self, opname): @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types_empty(self, opname): - # gh-15077 empty DataFrame + # GH 15077 empty DataFrame df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 @@ -100,7 +100,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): class TestFrameFlexArithmetic(object): def test_df_add_td64_columnwise(self): - # gh-22534 Check that column-wise addition broadcasts correctly + # GH 22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) tdi = pd.timedelta_range('1', periods=10) tser = pd.Series(tdi) @@ -112,7 +112,7 @@ def test_df_add_td64_columnwise(self): tm.assert_frame_equal(result, expected) def test_df_add_flex_filled_mixed_dtypes(self): - # gh-19611 + # GH 19611 dti = pd.date_range('2016-01-01', periods=3) ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]') df = pd.DataFrame({'A': dti, 'B': ser}) @@ -224,7 +224,7 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(df.div(row), df / row) tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - # broadcasting issue in gh-7325 + # broadcasting issue in GH 7325 df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='int64') expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0], axis='index') @@ -236,7 +236,7 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(result, expected) def test_arith_flex_zero_len_raises(self): - # gh-19522 passing fill_value to frame flex arith methods should + # GH 19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases ser_len0 = pd.Series([]) df_len0 = pd.DataFrame([], columns=['A', 'B']) @@ -251,7 +251,7 @@ def test_arith_flex_zero_len_raises(self): class TestFrameArithmetic(object): def test_df_bool_mul_int(self): - # gh-22047, gh-22163 multiplication by 1 should result in int dtype, + # GH 22047, GH 22163 multiplication by 1 should result in int dtype, # not object dtype df = pd.DataFrame([[False, True], [False, False]]) result = df * 1 From ca369425c24f17f846f257f1e0bf6e4cf60f82af Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 8 Oct 2018 01:12:45 +0200 Subject: [PATCH 13/13] Fix typo --- pandas/tests/frame/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index d5486ee41a42b..66bbc1f1a649b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -341,7 +341,7 @@ def test_corr_cov_independent_index_column(self): def test_corr_invalid_method(self): # GH 22298 - df = pd. DataFrame(np.random.normal(size=(10, 2))) + df = pd.DataFrame(np.random.normal(size=(10, 2))) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") with tm.assert_raises_regex(ValueError, msg):