From 6eb9a54be83cf0762d1f31329f9f8075e71594b5 Mon Sep 17 00:00:00 2001 From: hack-c Date: Fri, 8 Jan 2016 14:33:51 -0500 Subject: [PATCH 01/11] ENH DOC added some new doc examples to str_cat and catch an error to prompt more clearly for a sep keyword --- pandas/core/strings.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 37c8e8b1d8829..b3ced4dc4a6aa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -45,6 +45,15 @@ def str_cat(arr, others=None, sep=None, na_rep=None): Examples -------- + When ``na_rep`` is `None` (default behavior), NaN value(s) + in the Series propagate and return value will be NaN. + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ') + nan + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?') + 'a b ? c' + If ``others`` is specified, corresponding values are concatenated with the separator. Result will be a Series of strings. @@ -110,11 +119,13 @@ def str_cat(arr, others=None, sep=None, na_rep=None): def _length_check(others): n = None for x in others: - if n is None: - n = len(x) - elif len(x) != n: - raise ValueError('All arrays must be same length') - + try: + if n is None: + n = len(x) + elif len(x) != n: + raise ValueError('All arrays must be same length') + except TypeError: + raise ValueError("Did you mean to supply a `sep` keyword?") return n From d15c0f46325c6c626180ef2472bffb45287175ca Mon Sep 17 00:00:00 2001 From: hack-c Date: Fri, 8 Jan 2016 14:33:51 -0500 Subject: [PATCH 02/11] ENH DOC added some new doc examples to str_cat and catch an error to prompt more clearly for a sep keyword --- pandas/core/strings.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 37c8e8b1d8829..b3ced4dc4a6aa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -45,6 +45,15 @@ def str_cat(arr, others=None, sep=None, na_rep=None): Examples -------- + When ``na_rep`` is `None` (default behavior), NaN value(s) + in the Series propagate and return value will be NaN. + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ') + nan + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?') + 'a b ? c' + If ``others`` is specified, corresponding values are concatenated with the separator. Result will be a Series of strings. @@ -110,11 +119,13 @@ def str_cat(arr, others=None, sep=None, na_rep=None): def _length_check(others): n = None for x in others: - if n is None: - n = len(x) - elif len(x) != n: - raise ValueError('All arrays must be same length') - + try: + if n is None: + n = len(x) + elif len(x) != n: + raise ValueError('All arrays must be same length') + except TypeError: + raise ValueError("Did you mean to supply a `sep` keyword?") return n From 311dd03ed281820ef375c9f76430d86f3056cfa7 Mon Sep 17 00:00:00 2001 From: hack-c Date: Fri, 8 Jan 2016 15:00:06 -0500 Subject: [PATCH 03/11] TST added test to make sure improved error fires --- pandas/tests/test_strings.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0013a6579718a..2a959499b8bd5 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -126,6 +126,8 @@ def test_cat(self): exp = ['aa', NA, 'bb', 'bd', 'cfoo', NA] tm.assert_almost_equal(result, exp) + + def test_count(self): values = ['foo', 'foofoo', NA, 'foooofooofommmfoo'] @@ -2057,6 +2059,17 @@ def test_method_on_bytes(self): 'S2').astype(object)) tm.assert_series_equal(result, expected) + def test_str_cat_raises_intuitive_error(self): + s = Series(['a','b','c','d']) + message = "Did you mean to supply a `sep` keyword?" + with tm.assertRaisesRegexp(ValueError, message): + s.str.cat('|') + with tm.assertRaisesRegexp(ValueError, message): + s.str.cat(' ') + + + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From 1109211d0fcdd67aec5800ad202325c91f08206b Mon Sep 17 00:00:00 2001 From: hack-c Date: Wed, 20 Jan 2016 17:38:52 -0500 Subject: [PATCH 04/11] add comment with issue # --- pandas/tests/test_strings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 2a959499b8bd5..a0b9016d5a496 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2060,6 +2060,7 @@ def test_method_on_bytes(self): tm.assert_series_equal(result, expected) def test_str_cat_raises_intuitive_error(self): + # https://github.com/pydata/pandas/issues/11334 s = Series(['a','b','c','d']) message = "Did you mean to supply a `sep` keyword?" with tm.assertRaisesRegexp(ValueError, message): From 79c5a7f590d819f2f968efa358a9a5a9833d52c8 Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Fri, 22 Jan 2016 10:52:23 -0500 Subject: [PATCH 05/11] CLN: cleaned RangeIndex._min_fitting_element Added test cases for `_min_fitting_element` and `_max_fitting_element` (unused) that would fail in master because of precision Author: Ka Wo Chen Closes #12113 from kawochen/CLN-RI-max-fitting and squashes the following commits: 3cbc7f5 [Ka Wo Chen] CLN: cleaned RangeIndex._min_fitting_element --- pandas/core/index.py | 16 ++++++---------- pandas/tests/test_index.py | 10 ++++++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 558da897b241e..ad5ed86236e50 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -3,8 +3,6 @@ import warnings import operator from functools import partial -from math import ceil, floor - from sys import getsizeof import numpy as np @@ -4267,16 +4265,14 @@ def intersection(self, other): return new_index def _min_fitting_element(self, lower_limit): - """Returns the value of the smallest element greater than the limit""" - round = ceil if self._step > 0 else floor - no_steps = round((float(lower_limit) - self._start) / self._step) - return self._start + self._step * no_steps + """Returns the smallest element greater than or equal to the limit""" + no_steps = -(-(lower_limit - self._start) // abs(self._step)) + return self._start + abs(self._step) * no_steps def _max_fitting_element(self, upper_limit): - """Returns the value of the largest element smaller than the limit""" - round = floor if self._step > 0 else ceil - no_steps = round((float(upper_limit) - self._start) / self._step) - return self._start + self._step * no_steps + """Returns the largest element smaller than or equal to the limit""" + no_steps = (upper_limit - self._start) // abs(self._step) + return self._start + abs(self._step) * no_steps def _extended_gcd(self, a, b): """ diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 68150bfbca3f9..af42c2751bf46 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -4266,6 +4266,11 @@ def test_min_fitting_element(self): result = RangeIndex(5, 0, -1)._min_fitting_element(1) self.assertEqual(1, result) + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) + self.assertEqual(big_num, result) + def test_max_fitting_element(self): result = RangeIndex(0, 20, 2)._max_fitting_element(17) self.assertEqual(16, result) @@ -4279,6 +4284,11 @@ def test_max_fitting_element(self): result = RangeIndex(5, 0, -1)._max_fitting_element(4) self.assertEqual(4, result) + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) + self.assertEqual(big_num, result) + def test_pickle_compat_construction(self): # RangeIndex() is a valid constructor pass From 2842a43800f326b790bd8d6cbecddbbab1ddfae2 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 22 Jan 2016 10:56:08 -0500 Subject: [PATCH 06/11] CLN: fix flake8 warnings in pandas/stats Author: Wes McKinney Closes #12114 from wesm/style/stats and squashes the following commits: a3f8508 [Wes McKinney] CLN: fix flake8 warnings in pandas/stats --- pandas/stats/api.py | 2 + pandas/stats/common.py | 8 +- pandas/stats/fama_macbeth.py | 9 +- pandas/stats/interface.py | 6 +- pandas/stats/misc.py | 7 +- pandas/stats/moments.py | 126 +++++++++++++----------- pandas/stats/ols.py | 13 ++- pandas/stats/plm.py | 9 +- pandas/stats/tests/common.py | 2 + pandas/stats/tests/test_fama_macbeth.py | 3 + pandas/stats/tests/test_math.py | 7 +- pandas/stats/tests/test_ols.py | 12 ++- pandas/stats/tests/test_var.py | 4 + pandas/stats/var.py | 3 + 14 files changed, 131 insertions(+), 80 deletions(-) diff --git a/pandas/stats/api.py b/pandas/stats/api.py index 3732f9ed39524..fd81b875faa91 100644 --- a/pandas/stats/api.py +++ b/pandas/stats/api.py @@ -4,6 +4,8 @@ # pylint: disable-msg=W0611,W0614,W0401 +# flake8: noqa + from pandas.stats.moments import * from pandas.stats.interface import ols from pandas.stats.fama_macbeth import fama_macbeth diff --git a/pandas/stats/common.py b/pandas/stats/common.py index c30b3e7a4bf61..be3b842e93cc8 100644 --- a/pandas/stats/common.py +++ b/pandas/stats/common.py @@ -5,9 +5,10 @@ 2: 'expanding' } # also allow 'rolling' as key -_WINDOW_TYPES.update((v, v) for k,v in list(_WINDOW_TYPES.items())) +_WINDOW_TYPES.update((v, v) for k, v in list(_WINDOW_TYPES.items())) _ADDITIONAL_CLUSTER_TYPES = set(("entity", "time")) + def _get_cluster_type(cluster_type): # this was previous behavior if cluster_type is None: @@ -20,15 +21,18 @@ def _get_cluster_type(cluster_type): return final_type raise ValueError('Unrecognized cluster type: %s' % cluster_type) + def _get_window_type(window_type): # e.g., 0, 1, 2 final_type = _WINDOW_TYPES.get(window_type) # e.g., 'full_sample' - final_type = final_type or _WINDOW_TYPES.get(str(window_type).lower().replace(" ", "_")) + final_type = final_type or _WINDOW_TYPES.get( + str(window_type).lower().replace(" ", "_")) if final_type is None: raise ValueError('Unrecognized window type: %s' % window_type) return final_type + def banner(text, width=80): """ diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index 01e68be273226..caad53df2c7fe 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -7,6 +7,7 @@ import pandas.stats.common as common from pandas.util.decorators import cache_readonly +# flake8: noqa def fama_macbeth(**kwargs): """Runs Fama-MacBeth regression. @@ -28,6 +29,7 @@ def fama_macbeth(**kwargs): class FamaMacBeth(StringMixin): + def __init__(self, y, x, intercept=True, nw_lags=None, nw_lags_beta=None, entity_effects=False, time_effects=False, x_effects=None, @@ -39,7 +41,7 @@ def __init__(self, y, x, intercept=True, nw_lags=None, FutureWarning, stacklevel=4) if dropped_dummies is None: - dropped_dummies = {} + dropped_dummies = {} self._nw_lags_beta = nw_lags_beta from pandas.stats.plm import MovingPanelOLS @@ -99,7 +101,7 @@ def _results(self): def _coef_table(self): buffer = StringIO() buffer.write('%13s %13s %13s %13s %13s %13s\n' % - ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) + ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) template = '%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n' for i, name in enumerate(self._cols): @@ -148,12 +150,13 @@ def summary(self): class MovingFamaMacBeth(FamaMacBeth): + def __init__(self, y, x, window_type='rolling', window=10, intercept=True, nw_lags=None, nw_lags_beta=None, entity_effects=False, time_effects=False, x_effects=None, cluster=None, dropped_dummies=None, verbose=False): if dropped_dummies is None: - dropped_dummies = {} + dropped_dummies = {} self._window_type = common._get_window_type(window_type) self._window = window diff --git a/pandas/stats/interface.py b/pandas/stats/interface.py index 96b2b3e32be0d..caf468b4f85fe 100644 --- a/pandas/stats/interface.py +++ b/pandas/stats/interface.py @@ -76,7 +76,8 @@ def ols(**kwargs): result = ols(y=y, x=x) # Run expanding panel OLS with window 10 and entity clustering. - result = ols(y=y, x=x, cluster='entity', window_type='expanding', window=10) + result = ols(y=y, x=x, cluster='entity', window_type='expanding', + window=10) Returns ------- @@ -85,12 +86,11 @@ def ols(**kwargs): """ if (kwargs.get('cluster') is not None and - kwargs.get('nw_lags') is not None): + kwargs.get('nw_lags') is not None): raise ValueError( 'Pandas OLS does not work with Newey-West correction ' 'and clustering.') - pool = kwargs.get('pool') if 'pool' in kwargs: del kwargs['pool'] diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index ef663b25e9ca0..1a077dcb6f9a1 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -2,9 +2,10 @@ from pandas import compat import numpy as np -from pandas.core.api import Series, DataFrame, isnull, notnull +from pandas.core.api import Series, DataFrame from pandas.core.series import remove_na -from pandas.compat import zip +from pandas.compat import zip, lrange +import pandas.core.common as com def zscore(series): @@ -42,6 +43,7 @@ def correl_ts(frame1, frame2): def correl_xs(frame1, frame2): return correl_ts(frame1.T, frame2.T) + def percentileofscore(a, score, kind='rank'): """The percentile rank of a score relative to a list of scores. @@ -131,6 +133,7 @@ def percentileofscore(a, score, kind='rank'): else: raise ValueError("kind can only be 'rank', 'strict', 'weak' or 'mean'") + def percentileRank(frame, column=None, kind='mean'): """ Return score at percentile for each point in time (cross-section) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 28f35cf26e582..c875a9d49039b 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -20,9 +20,9 @@ 'expanding_sum', 'expanding_mean', 'expanding_std', 'expanding_cov', 'expanding_corr', 'expanding_var', 'expanding_skew', 'expanding_kurt', 'expanding_quantile', - 'expanding_median', 'expanding_apply' ] + 'expanding_median', 'expanding_apply'] -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # Docs # The order of arguments for the _doc_template is: @@ -72,7 +72,8 @@ span : float, optional Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` halflife : float, optional - Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` + Specify decay in terms of halflife, + :math:`\alpha = 1 - exp(log(0.5) / halflife)` min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). @@ -173,6 +174,7 @@ Use a standard estimation bias correction """ + def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): """ wrapper function to dispatch to the appropriate window functions @@ -189,8 +191,10 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): else: raise AssertionError("cannot support ndim > 2 for ndarray compat") - warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and will be removed " - "in a future version".format(dispatch=dispatch,name=name), + warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and " + "will be removed " + "in a future version" + .format(dispatch=dispatch, name=name), FutureWarning, stacklevel=3) # get the functional keywords here @@ -198,46 +202,46 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): func_kw = [] kwds = {} for k in func_kw: - value = kwargs.pop(k,None) + value = kwargs.pop(k, None) if value is not None: kwds[k] = value # how is a keyword that if not-None should be in kwds - how = kwargs.pop('how',None) + how = kwargs.pop('how', None) if how is not None: kwds['how'] = how - r = getattr(arg,dispatch)(**kwargs) + r = getattr(arg, dispatch)(**kwargs) if not is_ndarray: # give a helpful deprecation message # with copy-pastable arguments - pargs = ','.join([ "{a}={b}".format(a=a,b=b) for a,b in kwargs.items() if b is not None ]) + pargs = ','.join(["{a}={b}".format(a=a, b=b) + for a, b in kwargs.items() if b is not None]) aargs = ','.join(args) if len(aargs): aargs += ',' - def f(a,b): + def f(a, b): if lib.isscalar(b): - return "{a}={b}".format(a=a,b=b) - return "{a}=<{b}>".format(a=a,b=type(b).__name__) - aargs = ','.join([ f(a,b) for a,b in kwds.items() if b is not None ]) + return "{a}={b}".format(a=a, b=b) + return "{a}=<{b}>".format(a=a, b=type(b).__name__) + aargs = ','.join([f(a, b) for a, b in kwds.items() if b is not None]) warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} " "and will be removed in a future version, replace with " - "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})".format(klass=type(arg).__name__, - pargs=pargs, - aargs=aargs, - dispatch=dispatch, - name=name), + "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})" + .format(klass=type(arg).__name__, pargs=pargs, + aargs=aargs, dispatch=dispatch, name=name), FutureWarning, stacklevel=3) - result = getattr(r,name)(*args, **kwds) + result = getattr(r, name)(*args, **kwds) if is_ndarray: result = result.values return result + def rolling_count(arg, window, **kwargs): """ Rolling count of number of non-NaN observations inside provided window. @@ -249,8 +253,8 @@ def rolling_count(arg, window, **kwargs): Size of the moving window. This is the number of observations used for calculating the statistic. freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. center : boolean, default False Whether the label should correspond with center of window how : string, default 'mean' @@ -268,8 +272,10 @@ def rolling_count(arg, window, **kwargs): """ return ensure_compat('rolling', 'count', arg, window=window, **kwargs) + @Substitution("Unbiased moving covariance.", _binary_arg_flex, - _roll_kw%'None'+_pairwise_kw+_ddof_kw, _flex_retval, _roll_notes) + _roll_kw % 'None' + _pairwise_kw + _ddof_kw, _flex_retval, + _roll_notes) @Appender(_doc_template) def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): @@ -285,11 +291,12 @@ def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): other=arg2, window=window, pairwise=pairwise, - func_kw=['other','pairwise','ddof'], + func_kw=['other', 'pairwise', 'ddof'], **kwargs) + @Substitution("Moving sample correlation.", _binary_arg_flex, - _roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes) + _roll_kw % 'None' + _pairwise_kw, _flex_retval, _roll_notes) @Appender(_doc_template) def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): @@ -305,11 +312,11 @@ def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): other=arg2, window=window, pairwise=pairwise, - func_kw=['other','pairwise'], + func_kw=['other', 'pairwise'], **kwargs) -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # Exponential moving moments @@ -330,8 +337,9 @@ def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, how=how, ignore_na=ignore_na) + @Substitution("Exponentially-weighted moving variance", _unary_arg, - _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) + _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, freq=None, how=None, ignore_na=False, adjust=True): @@ -349,8 +357,9 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, bias=bias, func_kw=['bias']) + @Substitution("Exponentially-weighted moving std", _unary_arg, - _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) + _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, freq=None, how=None, ignore_na=False, adjust=True): @@ -372,10 +381,11 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, @Substitution("Exponentially-weighted moving covariance", _binary_arg_flex, - _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) + _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - bias=False, freq=None, pairwise=None, how=None, ignore_na=False, adjust=True): + bias=False, freq=None, pairwise=None, how=None, ignore_na=False, + adjust=True): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -398,10 +408,11 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, ignore_na=ignore_na, adjust=adjust, pairwise=pairwise, - func_kw=['other','pairwise','bias']) + func_kw=['other', 'pairwise', 'bias']) + @Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, - _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) + _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, freq=None, pairwise=None, how=None, ignore_na=False, adjust=True): @@ -425,9 +436,9 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, ignore_na=ignore_na, adjust=adjust, pairwise=pairwise, - func_kw=['other','pairwise']) + func_kw=['other', 'pairwise']) -#---------------------------------------------------------------------- +# --------------------------------------------------------------------- # Python interface to Cython functions @@ -435,9 +446,9 @@ def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): if how is None: how_arg_str = 'None' else: - how_arg_str = "'%s"%how + how_arg_str = "'%s" % how - @Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw, + @Substitution(desc, _unary_arg, _roll_kw % how_arg_str + additional_kw, _type_of_input_retval, _roll_notes) @Appender(_doc_template) def f(arg, window, min_periods=None, freq=None, center=False, @@ -468,6 +479,7 @@ def f(arg, window, min_periods=None, freq=None, center=False, rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') + def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, center=False): """Moving quantile. @@ -484,8 +496,8 @@ def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. center : boolean, default False Whether the label should correspond with center of window @@ -529,8 +541,8 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None, Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. center : boolean, default False Whether the label should correspond with center of window args : tuple @@ -558,7 +570,7 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None, freq=freq, center=center, min_periods=min_periods, - func_kw=['func','args','kwargs'], + func_kw=['func', 'args', 'kwargs'], func=func, args=args, kwargs=kwargs) @@ -583,8 +595,8 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. center : boolean, default False Whether the label should correspond with center of window mean : boolean, default True @@ -636,6 +648,7 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, func_kw=kwargs.keys(), **kwargs) + def _expanding_func(name, desc, func_kw=None, additional_kw=''): @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, _type_of_input_retval, "") @@ -674,8 +687,8 @@ def expanding_count(arg, freq=None): ---------- arg : DataFrame or numpy ndarray-like freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. Returns ------- @@ -702,8 +715,8 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None): Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. Returns ------- @@ -723,10 +736,12 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None): func_kw=['quantile'], quantile=quantile) + @Substitution("Unbiased expanding covariance.", _binary_arg_flex, - _expanding_kw+_pairwise_kw+_ddof_kw, _flex_retval, "") + _expanding_kw + _pairwise_kw + _ddof_kw, _flex_retval, "") @Appender(_doc_template) -def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof=1): +def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, + pairwise=None, ddof=1): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -742,11 +757,11 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof pairwise=pairwise, freq=freq, ddof=ddof, - func_kw=['other','pairwise','ddof']) + func_kw=['other', 'pairwise', 'ddof']) @Substitution("Expanding sample correlation.", _binary_arg_flex, - _expanding_kw+_pairwise_kw, _flex_retval, "") + _expanding_kw + _pairwise_kw, _flex_retval, "") @Appender(_doc_template) def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): if arg2 is None: @@ -763,7 +778,8 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): min_periods=min_periods, pairwise=pairwise, freq=freq, - func_kw=['other','pairwise','ddof']) + func_kw=['other', 'pairwise', 'ddof']) + def expanding_apply(arg, func, min_periods=1, freq=None, args=(), kwargs={}): @@ -778,8 +794,8 @@ def expanding_apply(arg, func, min_periods=1, freq=None, Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. + Frequency to conform the data to before computing the + statistic. Specified as a frequency string or DateOffset object. args : tuple Passed on to func kwargs : dict @@ -800,7 +816,7 @@ def expanding_apply(arg, func, min_periods=1, freq=None, arg, freq=freq, min_periods=min_periods, - func_kw=['func','args','kwargs'], + func_kw=['func', 'args', 'kwargs'], func=func, args=args, kwargs=kwargs) diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 7031d55c0f682..e2375ea180ed2 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -4,6 +4,8 @@ # pylint: disable-msg=W0201 +# flake8: noqa + from pandas.compat import zip, range, StringIO from itertools import starmap from pandas import compat @@ -22,7 +24,6 @@ _FP_ERR = 1e-8 - class OLS(StringMixin): """ Runs a full sample ordinary least squares regression. @@ -103,7 +104,7 @@ def _prepare_data(self): filt_rhs['intercept'] = 1. pre_filt_rhs['intercept'] = 1. - if hasattr(filt_weights,'to_dense'): + if hasattr(filt_weights, 'to_dense'): filt_weights = filt_weights.to_dense() return (filt_lhs, filt_rhs, filt_weights, @@ -630,6 +631,7 @@ class MovingOLS(OLS): Assume data is overlapping when computing Newey-West estimator """ + def __init__(self, y, x, weights=None, window_type='expanding', window=None, min_periods=None, intercept=True, nw_lags=None, nw_overlap=False): @@ -989,7 +991,7 @@ def _p_value_raw(self): result = [2 * t.sf(a, b) for a, b in zip(np.fabs(self._t_stat_raw), - self._df_resid_raw)] + self._df_resid_raw)] return np.array(result) @@ -1220,7 +1222,8 @@ def _nobs_raw(self): # expanding case window = len(self._index) - result = Series(self._time_obs_count).rolling(window, min_periods=1).sum().values + result = Series(self._time_obs_count).rolling( + window, min_periods=1).sum().values return result.astype(int) @@ -1314,7 +1317,7 @@ def _filter_data(lhs, rhs, weights=None): filt_lhs = combined.pop('__y__') filt_rhs = combined - if hasattr(filt_weights,'to_dense'): + if hasattr(filt_weights, 'to_dense'): filt_weights = filt_weights.to_dense() return (filt_lhs.to_dense(), filt_rhs.to_dense(), filt_weights, diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index 177452476b875..dca1977fb19bd 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -5,6 +5,8 @@ # pylint: disable-msg=W0231 # pylint: disable-msg=E1101,E1103 +# flake8: noqa + from __future__ import division from pandas.compat import range from pandas import compat @@ -291,7 +293,8 @@ def _add_categorical_dummies(self, panel, cat_mappings): self.log( '-- Excluding dummy for %s: %s' % (effect, to_exclude)) - dummies = dummies.filter(dummies.columns.difference([mapped_name])) + dummies = dummies.filter( + dummies.columns.difference([mapped_name])) dropped_dummy = True dummies = _convertDummies(dummies, cat_mappings.get(effect)) @@ -793,7 +796,7 @@ def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, resid = resid.swaplevel(0, 1).sortlevel(0) m = _group_agg(x.values * resid.values, x.index._bounds, - lambda x: np.sum(x, axis=0)) + lambda x: np.sum(x, axis=0)) if nw_lags is None: nw_lags = 0 @@ -805,6 +808,7 @@ def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, return np.dot(xx_inv, np.dot(xox, xx_inv)) + def _group_agg(values, bounds, f): """ R-style aggregator @@ -840,6 +844,7 @@ def _group_agg(values, bounds, f): return result + def _xx_time_effects(x, y): """ Returns X'X - (X'T) (T'T)^-1 (T'X) diff --git a/pandas/stats/tests/common.py b/pandas/stats/tests/common.py index 717eb51292796..0ce4b20a4b719 100644 --- a/pandas/stats/tests/common.py +++ b/pandas/stats/tests/common.py @@ -1,4 +1,5 @@ # pylint: disable-msg=W0611,W0402 +# flake8: noqa from datetime import datetime import string @@ -54,6 +55,7 @@ def check_for_statsmodels(): class BaseTest(tm.TestCase): + def setUp(self): check_for_scipy() check_for_statsmodels() diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index 05849bd80c7a8..deff392d6a16c 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -1,3 +1,5 @@ +# flake8: noqa + from pandas import DataFrame, Panel from pandas.stats.api import fama_macbeth from .common import assert_almost_equal, BaseTest @@ -9,6 +11,7 @@ class TestFamaMacBeth(BaseTest): + def testFamaMacBethRolling(self): # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, # nw_lags_beta=2) diff --git a/pandas/stats/tests/test_math.py b/pandas/stats/tests/test_math.py index 628a37006cfeb..bc09f33d2f467 100644 --- a/pandas/stats/tests/test_math.py +++ b/pandas/stats/tests/test_math.py @@ -5,12 +5,8 @@ import numpy as np from pandas.core.api import Series, DataFrame, date_range -from pandas.util.testing import assert_almost_equal -import pandas.core.datetools as datetools -import pandas.stats.moments as mom import pandas.util.testing as tm import pandas.stats.math as pmath -import pandas.tests.test_series as ts from pandas import ols N, K = 100, 10 @@ -20,7 +16,7 @@ import statsmodels.api as sm except ImportError: try: - import scikits.statsmodels.api as sm + import scikits.statsmodels.api as sm # noqa except ImportError: _have_statsmodels = False @@ -63,6 +59,5 @@ def test_inv_illformed(self): self.assertTrue(np.allclose(rs, expected)) if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 01095ab2336ce..175ad9dc33dc2 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -4,6 +4,8 @@ # pylint: disable-msg=W0212 +# flake8: noqa + from __future__ import division from datetime import datetime @@ -425,6 +427,7 @@ def test_catch_regressor_overlap(self): y = tm.makeTimeSeries() data = {'foo': df1, 'bar': df2} + def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ols(y=y, x=data) @@ -655,7 +658,8 @@ def testWithXEffectsAndDroppedDummies(self): def testWithXEffectsAndConversion(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2']) + result = ols(y=self.panel_y3, x=self.panel_x3, + x_effects=['x1', 'x2']) assert_almost_equal(result._y.values.flat, [1, 2, 3, 4]) exp_x = [[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1], @@ -713,10 +717,12 @@ def testRollingWithNeweyWest(self): def testRollingWithEntityCluster(self): self.checkMovingOLS(self.panel_x, self.panel_y, cluster='entity') + def testUnknownClusterRaisesValueError(self): assertRaisesRegexp(ValueError, "Unrecognized cluster.*ridiculous", self.checkMovingOLS, self.panel_x, self.panel_y, - cluster='ridiculous') + cluster='ridiculous') + def testRollingWithTimeEffectsAndEntityCluster(self): self.checkMovingOLS(self.panel_x, self.panel_y, time_effects=True, cluster='entity') @@ -744,6 +750,7 @@ def testNonPooled(self): self.checkNonPooled(y=self.panel_y, x=self.panel_x) self.checkNonPooled(y=self.panel_y, x=self.panel_x, window_type='rolling', window=25, min_periods=10) + def testUnknownWindowType(self): assertRaisesRegexp(ValueError, "window.*ridiculous", self.checkNonPooled, y=self.panel_y, x=self.panel_x, @@ -856,6 +863,7 @@ def test_group_agg(self): f2 = lambda x: np.zeros((2, 2)) self.assertRaises(Exception, _group_agg, values, bounds, f2) + def _check_non_raw_results(model): _check_repr(model) _check_repr(model.resid) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index c6eca4041a61b..9bcd070dc1d33 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -1,3 +1,5 @@ +# flake8: noqa + from __future__ import print_function from numpy.testing import run_module_suite, assert_equal, TestCase @@ -29,6 +31,7 @@ class CheckVAR(object): + def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_3) @@ -80,6 +83,7 @@ def test_bse(self): class Foo(object): + def __init__(self): data = sm.datasets.macrodata.load() data = data.data[['realinv', 'realgdp', 'realcons']].view((float, 3)) diff --git a/pandas/stats/var.py b/pandas/stats/var.py index b06e2f3181496..cc78ca2886fb3 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,3 +1,5 @@ +# flake8: noqa + from __future__ import division from pandas.compat import range, lrange, zip, reduce @@ -517,6 +519,7 @@ class PanelVAR(VAR): data: Panel or dict of DataFrame lags: int """ + def __init__(self, data, lags, intercept=True): self._data = _prep_panel_data(data) self._p = lags From db0094d7baf02172040d3dd13453b005134812eb Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 22 Jan 2016 10:57:16 -0500 Subject: [PATCH 07/11] CLN: grab bag of flake8 fixes Author: Wes McKinney Closes #12115 from wesm/style/flake8-misc and squashes the following commits: 017ca16 [Wes McKinney] CLN: grab bag of flake8 fixes --- pandas/__init__.py | 1 + pandas/_version.py | 2 ++ pandas/compat/__init__.py | 2 ++ pandas/compat/chainmap_impl.py | 30 +++++++++++++++++------- pandas/compat/openpyxl_compat.py | 2 +- pandas/compat/pickle_compat.py | 2 ++ pandas/computation/align.py | 4 ++-- pandas/computation/api.py | 2 ++ pandas/computation/engines.py | 9 +++++--- pandas/computation/expr.py | 27 +++++++++++----------- pandas/computation/expressions.py | 13 ++++++----- pandas/computation/ops.py | 7 ++++-- pandas/computation/pytables.py | 21 +++++++++-------- pandas/computation/tests/test_eval.py | 33 +++++++++++++++++---------- pandas/io/parsers.py | 2 +- pandas/msgpack/__init__.py | 2 ++ pandas/msgpack/exceptions.py | 2 ++ pandas/src/generate_code.py | 2 ++ pandas/util/decorators.py | 20 ++++++++-------- pandas/util/doctools.py | 30 ++++++++++++++---------- pandas/util/misc.py | 2 ++ pandas/util/print_versions.py | 12 ++++++---- pandas/util/terminal.py | 1 - pandas/util/testing.py | 2 ++ 24 files changed, 144 insertions(+), 86 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index c2ead16b6f821..ca304fa8f8631 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -1,5 +1,6 @@ # pylint: disable-msg=W0614,W0401,W0611,W0622 +# flake8: noqa __docformat__ = 'restructuredtext' diff --git a/pandas/_version.py b/pandas/_version.py index 61e9f3ff187ea..77b2fdca59576 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -8,6 +8,8 @@ # This file is released into the public domain. Generated by # versioneer-0.15 (https://github.com/warner/python-versioneer) +# flake8: noqa + import errno import os import re diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 2da4427af4cb6..f69cd4ef43f8b 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -25,6 +25,8 @@ * platform checker """ # pylint disable=W0611 +# flake8: noqa + import functools import itertools from distutils.version import LooseVersion diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py index 92d2424057f83..c059ad08d4a7f 100644 --- a/pandas/compat/chainmap_impl.py +++ b/pandas/compat/chainmap_impl.py @@ -58,16 +58,19 @@ def __missing__(self, key): def __getitem__(self, key): for mapping in self.maps: try: - return mapping[key] # can't use 'key in mapping' with defaultdict + # can't use 'key in mapping' with defaultdict + return mapping[key] except KeyError: pass - return self.__missing__(key) # support subclasses that define __missing__ + # support subclasses that define __missing__ + return self.__missing__(key) def get(self, key, default=None): return self[key] if key in self else default def __len__(self): - return len(set().union(*self.maps)) # reuses stored hash values if possible + # reuses stored hash values if possible + return len(set().union(*self.maps)) def __iter__(self): return iter(set().union(*self.maps)) @@ -89,7 +92,10 @@ def fromkeys(cls, iterable, *args): return cls(dict.fromkeys(iterable, *args)) def copy(self): - 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + """ + New ChainMap or subclass with a new copy of maps[0] and refs to + maps[1:] + """ return self.__class__(self.maps[0].copy(), *self.maps[1:]) __copy__ = copy @@ -115,21 +121,29 @@ def __delitem__(self, key): try: del self.maps[0][key] except KeyError: - raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + raise KeyError('Key not found in the first mapping: {!r}' + .format(key)) def popitem(self): - 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + """ + Remove and return an item pair from maps[0]. Raise KeyError is maps[0] + is empty. + """ try: return self.maps[0].popitem() except KeyError: raise KeyError('No keys found in the first mapping.') def pop(self, key, *args): - 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + """ + Remove *key* from maps[0] and return its value. Raise KeyError if + *key* not in maps[0]. + """ try: return self.maps[0].pop(key, *args) except KeyError: - raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + raise KeyError('Key not found in the first mapping: {!r}' + .format(key)) def clear(self): 'Clear maps[0], leaving maps[1:] intact.' diff --git a/pandas/compat/openpyxl_compat.py b/pandas/compat/openpyxl_compat.py index 266aded2071b6..87cf52cf00fef 100644 --- a/pandas/compat/openpyxl_compat.py +++ b/pandas/compat/openpyxl_compat.py @@ -32,4 +32,4 @@ def is_compat(major_ver=1): return LooseVersion(stop_ver) <= ver else: raise ValueError('cannot test for openpyxl compatibility with ver {0}' - .format(major_ver)) + .format(major_ver)) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index e794725574119..3059c39c2cb82 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,5 +1,7 @@ """ support pre 0.12 series pickle compatibility """ +# flake8: noqa + import sys import numpy as np import pandas diff --git a/pandas/computation/align.py b/pandas/computation/align.py index b5f730378c3cf..ab7c72e7480f9 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -173,8 +173,8 @@ def _reconstruct_object(typ, obj, axes, dtype): ret_value = res_t.type(obj) else: ret_value = typ(obj).astype(res_t) - # The condition is to distinguish 0-dim array (returned in case of scalar) - # and 1 element array + # The condition is to distinguish 0-dim array (returned in case of + # scalar) and 1 element array # e.g. np.array(0) and np.array([0]) if len(obj.shape) == 1 and len(obj) == 1: if not isinstance(ret_value, np.ndarray): diff --git a/pandas/computation/api.py b/pandas/computation/api.py index db8269a497768..e5814e08c4bbe 100644 --- a/pandas/computation/api.py +++ b/pandas/computation/api.py @@ -1,2 +1,4 @@ +# flake8: noqa + from pandas.computation.eval import eval from pandas.computation.expr import Expr diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py index 58b822af546c8..532921035c385 100644 --- a/pandas/computation/engines.py +++ b/pandas/computation/engines.py @@ -1,13 +1,16 @@ """Engine classes for :func:`~pandas.eval` """ +# flake8: noqa + import abc from pandas import compat from pandas.compat import DeepChainMap, map from pandas.core import common as com from pandas.computation.align import _align, _reconstruct_object -from pandas.computation.ops import UndefinedVariableError, _mathops, _reductions +from pandas.computation.ops import (UndefinedVariableError, + _mathops, _reductions) _ne_builtins = frozenset(_mathops + _reductions) @@ -30,8 +33,8 @@ def _check_ne_builtin_clash(expr): if overlap: s = ', '.join(map(repr, overlap)) - raise NumExprClobberingError('Variables in expression "%s" overlap with ' - 'numexpr builtins: (%s)' % (expr, s)) + raise NumExprClobberingError('Variables in expression "%s" ' + 'overlap with builtins: (%s)' % (expr, s)) class AbstractEngine(object): diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index 6da5cf4753a8e..61a3c9991160d 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -2,11 +2,7 @@ """ import ast -import operator -import sys -import inspect import tokenize -import datetime from functools import partial @@ -21,7 +17,7 @@ from pandas.computation.ops import _reductions, _mathops, _LOCAL_TAG from pandas.computation.ops import Op, BinOp, UnaryOp, Term, Constant, Div from pandas.computation.ops import UndefinedVariableError, FuncNode -from pandas.computation.scope import Scope, _ensure_scope +from pandas.computation.scope import Scope def tokenize_string(source): @@ -381,9 +377,9 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, rhs.type)) if self.engine != 'pytables': - if (res.op in _cmp_ops_syms - and getattr(lhs, 'is_datetime', False) - or getattr(rhs, 'is_datetime', False)): + if (res.op in _cmp_ops_syms and + getattr(lhs, 'is_datetime', False) or + getattr(rhs, 'is_datetime', False)): # all date ops must be done in python bc numexpr doesn't work # well with NaT return self._possibly_eval(res, self.binary_ops) @@ -392,8 +388,8 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, # "in"/"not in" ops are always evaluated in python return self._possibly_eval(res, eval_in_python) elif self.engine != 'pytables': - if (getattr(lhs, 'return_type', None) == object - or getattr(rhs, 'return_type', None) == object): + if (getattr(lhs, 'return_type', None) == object or + getattr(rhs, 'return_type', None) == object): # evaluate "==" and "!=" in python if either of our operands # has an object return type return self._possibly_eval(res, eval_in_python + @@ -517,7 +513,8 @@ def visit_Attribute(self, node, **kwargs): raise ValueError("Invalid Attribute context {0}".format(ctx.__name__)) def visit_Call_35(self, node, side=None, **kwargs): - """ in 3.5 the starargs attribute was changed to be more flexible, #11097 """ + """ in 3.5 the starargs attribute was changed to be more flexible, + #11097 """ if isinstance(node.func, ast.Attribute): res = self.visit_Attribute(node.func) @@ -541,7 +538,7 @@ def visit_Call_35(self, node, side=None, **kwargs): if isinstance(res, FuncNode): - new_args = [ self.visit(arg) for arg in node.args ] + new_args = [self.visit(arg) for arg in node.args] if node.keywords: raise TypeError("Function \"{0}\" does not support keyword " @@ -551,7 +548,7 @@ def visit_Call_35(self, node, side=None, **kwargs): else: - new_args = [ self.visit(arg).value for arg in node.args ] + new_args = [self.visit(arg).value for arg in node.args] for key in node.keywords: if not isinstance(key, ast.keyword): @@ -559,7 +556,9 @@ def visit_Call_35(self, node, side=None, **kwargs): "'{0}'".format(node.func.id)) if key.arg: - kwargs.append(ast.keyword(keyword.arg, self.visit(keyword.value))) + # TODO: bug? + kwargs.append(ast.keyword( + keyword.arg, self.visit(keyword.value))) # noqa return self.const_type(res(*new_args, **kwargs), self.env) diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index 70541c94b4e8e..6e33250010c2b 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -16,9 +16,10 @@ ver = ne.__version__ _NUMEXPR_INSTALLED = ver >= LooseVersion('2.1') if not _NUMEXPR_INSTALLED: - warnings.warn("The installed version of numexpr {ver} is not supported " - "in pandas and will be not be used\nThe minimum supported " - "version is 2.1\n".format(ver=ver), UserWarning) + warnings.warn( + "The installed version of numexpr {ver} is not supported " + "in pandas and will be not be used\nThe minimum supported " + "version is 2.1\n".format(ver=ver), UserWarning) except ImportError: # pragma: no cover _NUMEXPR_INSTALLED = False @@ -96,8 +97,8 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): return False -def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, reversed=False, - **eval_kwargs): +def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, + reversed=False, **eval_kwargs): result = None if _can_use_numexpr(op, op_str, a, b, 'evaluate'): @@ -106,7 +107,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, reve # we were originally called by a reversed op # method if reversed: - a,b = b,a + a, b = b, a a_value = getattr(a, "values", a) b_value = getattr(b, "values", b) diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index f6d5f171036ea..0d528de9f55b6 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -498,12 +498,13 @@ def return_type(self): if operand.return_type == np.dtype('bool'): return np.dtype('bool') if (isinstance(operand, Op) and - (operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict)): + (operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict)): return np.dtype('bool') return np.dtype('int') class MathCall(Op): + def __init__(self, func, args): super(MathCall, self).__init__(func.name, args) self.func = func @@ -518,9 +519,11 @@ def __unicode__(self): class FuncNode(object): + def __init__(self, name): if name not in _mathops: - raise ValueError("\"{0}\" is not a supported function".format(name)) + raise ValueError( + "\"{0}\" is not a supported function".format(name)) self.name = name self.func = getattr(np, name) diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 58359a815ed26..3b3a0a8ab8525 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -7,12 +7,11 @@ from datetime import datetime, timedelta import numpy as np import pandas as pd -from pandas.compat import u, string_types, PY3, DeepChainMap +from pandas.compat import u, string_types, DeepChainMap from pandas.core.base import StringMixin import pandas.core.common as com from pandas.computation import expr, ops from pandas.computation.ops import is_term, UndefinedVariableError -from pandas.computation.scope import _ensure_scope from pandas.computation.expr import BaseExprVisitor from pandas.computation.common import _ensure_decoded from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type @@ -147,17 +146,17 @@ def is_in_table(self): @property def kind(self): """ the kind of my field """ - return getattr(self.queryables.get(self.lhs),'kind',None) + return getattr(self.queryables.get(self.lhs), 'kind', None) @property def meta(self): """ the meta of my field """ - return getattr(self.queryables.get(self.lhs),'meta',None) + return getattr(self.queryables.get(self.lhs), 'meta', None) @property def metadata(self): """ the metadata of my field """ - return getattr(self.queryables.get(self.lhs),'metadata',None) + return getattr(self.queryables.get(self.lhs), 'metadata', None) def generate(self, v): """ create and return the op string for this TermValue """ @@ -195,7 +194,7 @@ def stringify(value): return TermValue(int(v), v, kind) elif meta == u('category'): metadata = com._values_from_object(self.metadata) - result = metadata.searchsorted(v,side='left') + result = metadata.searchsorted(v, side='left') return TermValue(result, result, u('integer')) elif kind == u('integer'): v = int(float(v)) @@ -504,7 +503,7 @@ def __init__(self, where, op=None, value=None, queryables=None, else: w = self.parse_back_compat(w) where[idx] = w - where = ' & ' .join(["(%s)" % w for w in where]) + where = ' & ' .join(["(%s)" % w for w in where]) # noqa self.expr = where self.env = Scope(scope_level + 1, local_dict=local_dict) @@ -551,12 +550,14 @@ def parse_back_compat(self, w, op=None, value=None): # stringify with quotes these values def convert(v): - if isinstance(v, (datetime,np.datetime64,timedelta,np.timedelta64)) or hasattr(v, 'timetuple'): + if (isinstance(v, (datetime, np.datetime64, + timedelta, np.timedelta64)) or + hasattr(v, 'timetuple')): return "'{0}'".format(v) return v - if isinstance(value, (list,tuple)): - value = [ convert(v) for v in value ] + if isinstance(value, (list, tuple)): + value = [convert(v) for v in value] else: value = convert(value) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index b085232a1a8be..82da9cacd1460 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +# flake8: noqa + import warnings import operator from itertools import product @@ -82,6 +84,7 @@ def _is_py3_complex_incompat(result, expected): _good_arith_ops = com.difference(_arith_ops_syms, _special_case_arith_ops_syms) + class TestEvalNumexprPandas(tm.TestCase): @classmethod @@ -194,7 +197,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): binop=binop, cmp2=cmp2) scalar_with_in_notin = (np.isscalar(rhs) and (cmp1 in skip_these or - cmp2 in skip_these)) + cmp2 in skip_these)) if scalar_with_in_notin: with tm.assertRaises(TypeError): pd.eval(ex, engine=self.engine, parser=self.parser) @@ -211,12 +214,12 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): # hand side bool ops are fixed. # try: - # self.assertRaises(Exception, pd.eval, ex, - #local_dict={'lhs': lhs, 'rhs': rhs}, - # engine=self.engine, parser=self.parser) + # self.assertRaises(Exception, pd.eval, ex, + #local_dict={'lhs': lhs, 'rhs': rhs}, + # engine=self.engine, parser=self.parser) # except AssertionError: - #import ipdb; ipdb.set_trace() - # raise + #import ipdb; ipdb.set_trace() + # raise else: expected = _eval_single_bin( lhs_new, binop, rhs_new, self.engine) @@ -351,7 +354,7 @@ def check_single_invert_op(self, lhs, cmp1, rhs): for engine in self.current_engines: tm.skip_if_no_ne(engine) tm.assert_numpy_array_equal(result, pd.eval('~elb', engine=engine, - parser=self.parser)) + parser=self.parser)) def check_compound_invert_op(self, lhs, cmp1, rhs): skip_these = 'in', 'not in' @@ -616,8 +619,8 @@ def test_unary_in_array(self): '-False, False, ~False, +False,' '-37, 37, ~37, +37]'), np.array([-True, True, ~True, +True, - -False, False, ~False, +False, - -37, 37, ~37, +37])) + -False, False, ~False, +False, + -37, 37, ~37, +37])) def test_disallow_scalar_bool_ops(self): exprs = '1 or 2', '1 and 2' @@ -834,7 +837,8 @@ def check_medium_complex_frame_alignment(self, engine, parser): res = pd.eval('df + df2 + df3', engine=engine, parser=parser) else: - res = pd.eval('df + df2 + df3', engine=engine, parser=parser) + res = pd.eval('df + df2 + df3', + engine=engine, parser=parser) assert_frame_equal(res, df + df2 + df3) @slow @@ -1549,6 +1553,7 @@ def setUpClass(cls): class TestMathPythonPython(tm.TestCase): + @classmethod def setUpClass(cls): super(TestMathPythonPython, cls).setUpClass() @@ -1648,6 +1653,7 @@ def test_keyword_arg(self): class TestMathPythonPandas(TestMathPythonPython): + @classmethod def setUpClass(cls): super(TestMathPythonPandas, cls).setUpClass() @@ -1656,6 +1662,7 @@ def setUpClass(cls): class TestMathNumExprPandas(TestMathPythonPython): + @classmethod def setUpClass(cls): super(TestMathNumExprPandas, cls).setUpClass() @@ -1664,6 +1671,7 @@ def setUpClass(cls): class TestMathNumExprPython(TestMathPythonPython): + @classmethod def setUpClass(cls): super(TestMathNumExprPython, cls).setUpClass() @@ -1679,7 +1687,7 @@ class TestScope(object): def check_global_scope(self, e, engine, parser): tm.skip_if_no_ne(engine) tm.assert_numpy_array_equal(_var_s * 2, pd.eval(e, engine=engine, - parser=parser)) + parser=parser)) def test_global_scope(self): e = '_var_s * 2' @@ -1819,7 +1827,7 @@ def check_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': with tm.assertRaisesRegexp(NumExprClobberingError, - 'Variables in expression .+'): + 'Variables in expression .+'): pd.eval('sin + dotted_line', engine=engine, parser=parser) else: res = pd.eval('sin + dotted_line', engine=engine, parser=parser) @@ -1906,6 +1914,7 @@ def check_negate_lt_eq_le(engine, parser): result = df.query('not (cat > 0)', engine=engine, parser=parser) tm.assert_frame_equal(result, expected) + def test_negate_lt_eq_le(): for engine, parser in product(_engines, expr._parsers): yield check_negate_lt_eq_le, engine, parser diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f06ad927bb61b..65d853f92b6cd 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2211,7 +2211,7 @@ def _clean_na_values(na_values, keep_default_na=True): v = set(list(v)) | _NA_VALUES na_values[k] = v na_fvalues = dict([ - (k, _floatify_na_values(v)) for k, v in na_values.items() + (k, _floatify_na_values(v)) for k, v in na_values.items() # noqa ]) else: if not com.is_list_like(na_values): diff --git a/pandas/msgpack/__init__.py b/pandas/msgpack/__init__.py index bf0e2853ae131..0c2370df936a4 100644 --- a/pandas/msgpack/__init__.py +++ b/pandas/msgpack/__init__.py @@ -1,4 +1,6 @@ # coding: utf-8 +# flake8: noqa + from pandas.msgpack._version import version from pandas.msgpack.exceptions import * diff --git a/pandas/msgpack/exceptions.py b/pandas/msgpack/exceptions.py index f7678f135bd26..40f5a8af8f583 100644 --- a/pandas/msgpack/exceptions.py +++ b/pandas/msgpack/exceptions.py @@ -22,8 +22,10 @@ def __init__(self, unpacked, extra): def __str__(self): return "unpack(b) received extra data." + class PackException(Exception): pass + class PackValueError(PackException, ValueError): pass diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index cf42279c89508..053e69b7f5426 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -6,6 +6,8 @@ """ +# flake8: noqa + from __future__ import print_function import os from pandas.compat import StringIO diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 5c3cb573766d7..c2d25b30c2b22 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,5 +1,5 @@ from pandas.compat import StringIO, callable -from pandas.lib import cache_readonly +from pandas.lib import cache_readonly # noqa import sys import warnings from textwrap import dedent @@ -60,6 +60,7 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): not callable(mapping): raise TypeError("mapping from old to new argument values " "must be dict or callable!") + def _deprecate_kwarg(func): @wraps(func) def wrapper(*args, **kwargs): @@ -82,8 +83,8 @@ def wrapper(*args, **kwargs): warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name, None) is not None: - msg = "Can only specify '%s' or '%s', not both" % \ - (old_arg_name, new_arg_name) + msg = ("Can only specify '%s' or '%s', not both" % + (old_arg_name, new_arg_name)) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value @@ -126,7 +127,7 @@ def some_function(x): """ def __init__(self, *args, **kwargs): if (args and kwargs): - raise AssertionError( "Only positional or keyword args are allowed") + raise AssertionError("Only positional or keyword args are allowed") self.params = args or kwargs @@ -261,7 +262,8 @@ def knownfailer(*args, **kwargs): return knownfail_decorator -def make_signature(func) : + +def make_signature(func): """ Returns a string repr of the arg list of a func call, with any defaults @@ -275,15 +277,15 @@ def make_signature(func) : """ from inspect import getargspec spec = getargspec(func) - if spec.defaults is None : + if spec.defaults is None: n_wo_defaults = len(spec.args) defaults = ('',) * n_wo_defaults - else : + else: n_wo_defaults = len(spec.args) - len(spec.defaults) defaults = ('',) * n_wo_defaults + spec.defaults args = [] - for i, (var, default) in enumerate(zip(spec.args, defaults)) : - args.append(var if default=='' else var+'='+repr(default)) + for i, (var, default) in enumerate(zip(spec.args, defaults)): + args.append(var if default == '' else var + '=' + repr(default)) if spec.varargs: args.append('*' + spec.varargs) if spec.keywords: diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py index 20a2a68ce6b03..62dcba1405581 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/doctools.py @@ -23,11 +23,15 @@ def _get_cells(self, left, right, vertical): """Calcurate appropriate figure size based on left and right data""" if vertical: # calcurate required number of cells - vcells = max(sum([self._shape(l)[0] for l in left]), self._shape(right)[0]) - hcells = max([self._shape(l)[1] for l in left]) + self._shape(right)[1] + vcells = max(sum([self._shape(l)[0] for l in left]), + self._shape(right)[0]) + hcells = (max([self._shape(l)[1] for l in left]) + + self._shape(right)[1]) else: - vcells = max([self._shape(l)[0] for l in left] + [self._shape(right)[0]]) - hcells = sum([self._shape(l)[1] for l in left] + [self._shape(right)[1]]) + vcells = max([self._shape(l)[0] for l in left] + + [self._shape(right)[0]]) + hcells = sum([self._shape(l)[1] for l in left] + + [self._shape(right)[1]]) return hcells, vcells def plot(self, left, right, labels=None, vertical=True): @@ -66,10 +70,11 @@ def plot(self, left, right, labels=None, vertical=True): max_left_rows = max([self._shape(l)[0] for l in left]) for i, (l, label) in enumerate(zip(left, labels)): ax = fig.add_subplot(gs[i, 0:max_left_cols]) - self._make_table(ax, l, title=label, height=1.0/max_left_rows) + self._make_table(ax, l, title=label, + height=1.0 / max_left_rows) # right ax = plt.subplot(gs[:, max_left_cols:]) - self._make_table(ax, right, title='Result', height=1.05/vcells) + self._make_table(ax, right, title='Result', height=1.05 / vcells) fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95) else: max_rows = max([self._shape(df)[0] for df in left + [right]]) @@ -79,7 +84,7 @@ def plot(self, left, right, labels=None, vertical=True): i = 0 for l, label in zip(left, labels): sp = self._shape(l) - ax = fig.add_subplot(gs[0, i:i+sp[1]]) + ax = fig.add_subplot(gs[0, i:i + sp[1]]) self._make_table(ax, l, title=label, height=height) i += sp[1] # right @@ -107,12 +112,14 @@ def _insert_index(self, data): data.insert(0, 'Index', data.index) else: for i in range(idx_nlevels): - data.insert(i, 'Index{0}'.format(i), data.index.get_level_values(i)) + data.insert(i, 'Index{0}'.format(i), + data.index.get_level_values(i)) col_nlevels = data.columns.nlevels if col_nlevels > 1: col = data.columns.get_level_values(0) - values = [data.columns.get_level_values(i).values for i in range(1, col_nlevels)] + values = [data.columns.get_level_values(i).values + for i in range(1, col_nlevels)] col_df = pd.DataFrame(values) data.columns = col_df.columns data = pd.concat([col_df, data]) @@ -151,7 +158,6 @@ def _make_table(self, ax, df, title, height=None): if __name__ == "__main__": - import pandas as pd import matplotlib.pyplot as plt p = TablePlotter() @@ -174,11 +180,11 @@ def _make_table(self, ax, df, title, height=None): plt.show() idx = pd.MultiIndex.from_tuples([(1, 'A'), (1, 'B'), (1, 'C'), - (2, 'A'), (2, 'B'), (2, 'C')]) + (2, 'A'), (2, 'B'), (2, 'C')]) col = pd.MultiIndex.from_tuples([(1, 'A'), (1, 'B')]) df3 = pd.DataFrame({'v1': [1, 2, 3, 4, 5, 6], 'v2': [5, 6, 7, 8, 9, 10]}, - index=idx) + index=idx) df3.columns = col p.plot(df3, df3, labels=['df3']) plt.show() diff --git a/pandas/util/misc.py b/pandas/util/misc.py index 15492cde5a9f7..2dd59043b5f63 100644 --- a/pandas/util/misc.py +++ b/pandas/util/misc.py @@ -1,10 +1,12 @@ """ various miscellaneous utilities """ + def is_little_endian(): """ am I little endian """ import sys return sys.byteorder == 'little' + def exclusive(*args): count = sum([arg is not None for arg in args]) return count == 1 diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index a4cb84d530336..5c09f877d863b 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -16,7 +16,8 @@ def get_sys_info(): if os.path.isdir(".git") and os.path.isdir("pandas"): try: pipe = subprocess.Popen('git log --format="%H" -n 1'.split(" "), - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) so, serr = pipe.communicate() except: pass @@ -32,8 +33,8 @@ def get_sys_info(): blob.append(('commit', commit)) try: - sysname, nodename, release, version, machine, processor = platform.uname( - ) + (sysname, nodename, release, + version, machine, processor) = platform.uname() blob.extend([ ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]), ("python-bits", struct.calcsize("P") * 8), @@ -113,7 +114,7 @@ def show_versions(as_json=False): j = dict(system=dict(sys_info), dependencies=dict(deps_blob)) - if as_json == True: + if as_json is True: print(j) else: with codecs.open(as_json, "wb", encoding='utf8') as f: @@ -136,7 +137,8 @@ def main(): from optparse import OptionParser parser = OptionParser() parser.add_option("-j", "--json", metavar="FILE", nargs=1, - help="Save output as JSON into file, pass in '-' to output to stdout") + help="Save output as JSON into file, pass in " + "'-' to output to stdout") (options, args) = parser.parse_args() diff --git a/pandas/util/terminal.py b/pandas/util/terminal.py index fc985855d2682..6b8428ff75806 100644 --- a/pandas/util/terminal.py +++ b/pandas/util/terminal.py @@ -94,7 +94,6 @@ def ioctl_GWINSZ(fd): import fcntl import termios import struct - import os cr = struct.unpack( 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) except: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 685d89fee53b5..b78ba929463c9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1,6 +1,8 @@ from __future__ import division # pylint: disable-msg=W0402 +# flake8: noqa + import random import re import string From 8a98ff8a3b9e2427a6fcc991b1793917a6083fe0 Mon Sep 17 00:00:00 2001 From: hack-c Date: Fri, 8 Jan 2016 14:33:51 -0500 Subject: [PATCH 08/11] ENH DOC added some new doc examples to str_cat and catch an error to prompt more clearly for a sep keyword --- pandas/core/strings.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 1ffa836a75a1b..ba2537a25bb56 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -45,6 +45,15 @@ def str_cat(arr, others=None, sep=None, na_rep=None): Examples -------- + When ``na_rep`` is `None` (default behavior), NaN value(s) + in the Series propagate and return value will be NaN. + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ') + nan + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?') + 'a b ? c' + If ``others`` is specified, corresponding values are concatenated with the separator. Result will be a Series of strings. @@ -110,11 +119,13 @@ def str_cat(arr, others=None, sep=None, na_rep=None): def _length_check(others): n = None for x in others: - if n is None: - n = len(x) - elif len(x) != n: - raise ValueError('All arrays must be same length') - + try: + if n is None: + n = len(x) + elif len(x) != n: + raise ValueError('All arrays must be same length') + except TypeError: + raise ValueError("Did you mean to supply a `sep` keyword?") return n From 9b1be94dbd78b27e67170c302fd1c2544c108830 Mon Sep 17 00:00:00 2001 From: hack-c Date: Fri, 8 Jan 2016 15:00:06 -0500 Subject: [PATCH 09/11] TST added test to make sure improved error fires --- pandas/tests/test_strings.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f8255c4b4a410..66a517c9ebf0a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -123,6 +123,8 @@ def test_cat(self): exp = ['aa', NA, 'bb', 'bd', 'cfoo', NA] tm.assert_almost_equal(result, exp) + + def test_count(self): values = ['foo', 'foofoo', NA, 'foooofooofommmfoo'] @@ -2088,6 +2090,17 @@ def test_method_on_bytes(self): ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) + def test_str_cat_raises_intuitive_error(self): + s = Series(['a','b','c','d']) + message = "Did you mean to supply a `sep` keyword?" + with tm.assertRaisesRegexp(ValueError, message): + s.str.cat('|') + with tm.assertRaisesRegexp(ValueError, message): + s.str.cat(' ') + + + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From e1ebbadfb8b86e31a7a77484627ca47683fba54b Mon Sep 17 00:00:00 2001 From: hack-c Date: Wed, 20 Jan 2016 17:38:52 -0500 Subject: [PATCH 10/11] add comment with issue # --- pandas/tests/test_strings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 66a517c9ebf0a..199af4354c4a3 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2091,6 +2091,7 @@ def test_method_on_bytes(self): tm.assert_series_equal(result, expected) def test_str_cat_raises_intuitive_error(self): + # https://github.com/pydata/pandas/issues/11334 s = Series(['a','b','c','d']) message = "Did you mean to supply a `sep` keyword?" with tm.assertRaisesRegexp(ValueError, message): From fb1274ce66e896d3fca97c8479c26e3d4be40704 Mon Sep 17 00:00:00 2001 From: hack-c Date: Fri, 22 Jan 2016 11:13:26 -0500 Subject: [PATCH 11/11] ENH changed behavior of str_cat to drop NaNs rather than returning NaN in case of array with NaN. --- pandas/core/strings.py | 2 +- pandas/tests/test_strings.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ba2537a25bb56..2c2e2092f9cf6 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -112,7 +112,7 @@ def str_cat(arr, others=None, sep=None, na_rep=None): arr = np.asarray(arr, dtype=object) mask = isnull(arr) if na_rep is None and mask.any(): - return np.nan + na_rep = '' return sep.join(np.where(mask, na_rep, arr)) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 199af4354c4a3..2a5d034adc50d 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -100,7 +100,8 @@ def test_cat(self): # single array result = strings.str_cat(one) - self.assertTrue(isnull(result)) + exp = 'aabbc' + self.assertEqual(result, exp) result = strings.str_cat(one, na_rep='NA') exp = 'aabbcNA' @@ -2091,7 +2092,7 @@ def test_method_on_bytes(self): tm.assert_series_equal(result, expected) def test_str_cat_raises_intuitive_error(self): - # https://github.com/pydata/pandas/issues/11334 + # https://github.com/pydata/pandas/issues/11334i s = Series(['a','b','c','d']) message = "Did you mean to supply a `sep` keyword?" with tm.assertRaisesRegexp(ValueError, message):