diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c0a2843370f4..5294db0e6e7c1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -710,8 +710,9 @@ def __unicode__(self): self.info(buf=buf, verbose=verbose) value = buf.getvalue() - if not type(value) == unicode: - raise AssertionError() + if not isinstance(value, unicode): + raise AssertionError("'{0}' is not of type 'unicode', it has " + "type '{0}'".format(type(value))) return value diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 0a099661c58f1..cccee11ffd5db 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -631,8 +631,10 @@ def get_value(self, *args): value : scalar value """ # require an arg for each axis - if not ((len(args) == self._AXIS_LEN)): - raise AssertionError() + if len(args) != self._AXIS_LEN: + raise AssertionError('There must be an argument for each axis, ' + 'you gave {0} args, but {1} are ' + 'required'.format(len(args), self._AXIS_LEN)) # hm, two layers to the onion frame = self._get_item_cache(args[0]) @@ -656,8 +658,12 @@ def set_value(self, *args): otherwise a new object """ # require an arg for each axis and the value - if not ((len(args) == self._AXIS_LEN + 1)): - raise AssertionError() + if len(args) != self._AXIS_LEN + 1: + raise AssertionError('There must be an argument for each axis plus' + ' the value provided, you gave {0} args, ' + 'but {1} are required'.format(len(args), + self._AXIS_LEN + + 1)) try: frame = self._get_item_cache(args[0]) @@ -667,7 +673,7 @@ def set_value(self, *args): axes = self._expand_axes(args) d = self._construct_axes_dict_from(self, axes, copy=False) result = self.reindex(**d) - args = list(args) + args = list(args) likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1]) made_bigger = not np.array_equal( axes[0], getattr(self, self._info_axis)) @@ -702,8 +708,10 @@ def __setitem__(self, key, value): **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])) mat = value.values elif isinstance(value, np.ndarray): - if not ((value.shape == shape[1:])): - raise AssertionError() + if value.shape != shape[1:]: + raise AssertionError('shape of value must be {0}, shape of ' + 'given object was ' + '{1}'.format(shape[1:], value.shape)) mat = np.asarray(value) elif np.isscalar(value): dtype, value = _infer_dtype_from_scalar(value) @@ -1513,8 +1521,9 @@ def _extract_axes(self, data, axes, **kwargs): @staticmethod def _extract_axes_for_slice(self, axes): """ return the slice dictionary for these axes """ - return dict([(self._AXIS_SLICEMAP[i], a) for i, a - in zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes)]) + return dict([(self._AXIS_SLICEMAP[i], a) + for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN - + len(axes):], axes)]) @staticmethod def _prep_ndarray(self, values, copy=True): @@ -1526,8 +1535,11 @@ def _prep_ndarray(self, values, copy=True): else: if copy: values = values.copy() - if not ((values.ndim == self._AXIS_LEN)): - raise AssertionError() + if values.ndim != self._AXIS_LEN: + raise AssertionError("The number of dimensions required is {0}, " + "but the number of dimensions of the " + "ndarray given was {1}".format(self._AXIS_LEN, + values.ndim)) return values @staticmethod diff --git a/pandas/core/series.py b/pandas/core/series.py index ab8a48f4b8eb9..e4b7b4fcc6deb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1146,8 +1146,9 @@ def __unicode__(self): else: result = u'Series([], dtype: %s)' % self.dtype - if not ( type(result) == unicode): - raise AssertionError() + if not isinstance(result, unicode): + raise AssertionError("result must be of type unicode, type" + " of result is '{0}'".format(type(result))) return result def __repr__(self): @@ -1216,9 +1217,9 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, length=length, dtype=dtype, name=name) # catch contract violations - if not type(the_repr) == unicode: - raise AssertionError("expected unicode string") - + if not isinstance(the_repr, unicode): + raise AssertionError("result must be of type unicode, type" + " of result is '{0}'".format(type(the_repr))) if buf is None: return the_repr else: @@ -1228,19 +1229,21 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, with open(buf, 'w') as f: f.write(the_repr) - def _get_repr(self, name=False, print_header=False, length=True, dtype=True, - na_rep='NaN', float_format=None): + def _get_repr(self, name=False, print_header=False, length=True, + dtype=True, na_rep='NaN', float_format=None): """ Internal function, should always return unicode string """ formatter = fmt.SeriesFormatter(self, name=name, header=print_header, - length=length, dtype=dtype, na_rep=na_rep, + length=length, dtype=dtype, + na_rep=na_rep, float_format=float_format) result = formatter.to_string() - if not ( type(result) == unicode): - raise AssertionError() + if not isinstance(result, unicode): + raise AssertionError("result must be of type unicode, type" + " of result is '{0}'".format(type(result))) return result def __iter__(self): diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index c7a60d13f1778..885818104b6f7 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -46,12 +46,14 @@ def _maybe_cast(arr): def _check_columns(cols): - if not ((len(cols) > 0)): - raise AssertionError() + if not len(cols): + raise AssertionError("There must be at least 1 column") N = len(cols[0]) for c in cols[1:]: - if not ((len(c) == N)): - raise AssertionError() + if len(c) != N: + raise AssertionError('All columns must have the same length: ' + '{0}, at least one column has ' + 'length {1}'.format(N, len(c))) return N diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 249afe0755445..30bea1ac76fc5 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -630,8 +630,10 @@ def _clean_options(self, options, engine): # type conversion-related if converters is not None: - if not (isinstance(converters, dict)): - raise AssertionError() + if not isinstance(converters, dict): + raise AssertionError('Type converters must be a dict or' + ' subclass, input was ' + 'a {0}'.format(type(converters))) else: converters = {} @@ -1649,8 +1651,8 @@ def _rows_to_cols(self, content): if self._implicit_index: col_len += len(self.index_col) - if not ((self.skip_footer >= 0)): - raise AssertionError() + if self.skip_footer < 0: + raise AssertionError('skip footer cannot be negative') if col_len != zip_len and self.index_col is not False: row_num = -1 @@ -1946,15 +1948,18 @@ def __init__(self, f, colspecs, filler, thousands=None): self.filler = filler # Empty characters between fields. self.thousands = thousands - if not ( isinstance(colspecs, (tuple, list))): - raise AssertionError() + if not isinstance(colspecs, (tuple, list)): + raise AssertionError("column specifications must be a list or" + " tuple, input was " + "a {0}".format(type(colspecs))) for colspec in colspecs: - if not ( isinstance(colspec, (tuple, list)) and - len(colspec) == 2 and - isinstance(colspec[0], int) and - isinstance(colspec[1], int) ): - raise AssertionError() + if not (isinstance(colspec, (tuple, list)) and + len(colspec) == 2 and + isinstance(colspec[0], int) and + isinstance(colspec[1], int)): + raise AssertionError('Each column specification must be ' + '2 element tuple or list of integers') def next(self): line = next(self.f) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 035db279064a0..667f076ee7cc3 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -14,7 +14,6 @@ from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib -import pandas.lib as lib import pandas.index as _index @@ -25,8 +24,8 @@ def _sparse_op_wrap(op, name): """ def wrapper(self, other): if isinstance(other, np.ndarray): - if not ((len(self) == len(other))): - raise AssertionError() + if len(self) != len(other): + raise AssertionError("Operands must be of the same size") if not isinstance(other, SparseArray): other = SparseArray(other, fill_value=self.fill_value) return _sparse_array_op(self, other, op, name) @@ -130,8 +129,10 @@ def __new__(cls, data, sparse_index=None, kind='integer', fill_value=None, fill_value=fill_value) else: values = data - if not ((len(values) == sparse_index.npoints)): - raise AssertionError() + if len(values) != sparse_index.npoints: + raise AssertionError("Non array-like type {0} must have" + " the same length as the" + " index".format(type(values))) # Create array, do *not* copy data by default if copy: @@ -277,13 +278,13 @@ def take(self, indices, axis=0): ------- taken : ndarray """ - if not ((axis == 0)): - raise AssertionError() + if axis: + raise AssertionError("axis must be 0, input was {0}".format(axis)) indices = np.asarray(indices, dtype=int) n = len(self) if (indices < 0).any() or (indices >= n).any(): - raise Exception('out of bounds access') + raise IndexError('out of bounds access') if self.sp_index.npoints > 0: locs = np.array([self.sp_index.lookup(loc) for loc in indices]) @@ -296,10 +297,10 @@ def take(self, indices, axis=0): return result def __setitem__(self, key, value): - raise Exception('SparseArray objects are immutable') + raise TypeError('SparseArray objects are immutable') def __setslice__(self, i, j, value): - raise Exception('SparseArray objects are immutable') + raise TypeError('SparseArray objects are immutable') def to_dense(self): """ @@ -313,7 +314,7 @@ def astype(self, dtype=None): """ dtype = np.dtype(dtype) if dtype is not None and dtype not in (np.float_, float): - raise Exception('Can only support floating point data for now') + raise TypeError('Can only support floating point data for now') return self.copy() def copy(self, deep=True): diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 9694cc005d178..977cf0e3953e4 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -8,7 +8,7 @@ from numpy import nan import numpy as np -from pandas.core.common import _pickle_array, _unpickle_array, _try_sort +from pandas.core.common import _unpickle_array, _try_sort from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices from pandas.core.series import Series @@ -16,11 +16,9 @@ _default_index) from pandas.util.decorators import cache_readonly import pandas.core.common as com -import pandas.core.datetools as datetools from pandas.sparse.series import SparseSeries from pandas.util.decorators import Appender -import pandas.lib as lib class _SparseMockBlockManager(object): @@ -713,8 +711,8 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', def _join_index(self, other, how, lsuffix, rsuffix): if isinstance(other, Series): - if not (other.name is not None): - raise AssertionError() + if other.name is None: + raise AssertionError('Cannot join series with no name') other = SparseDataFrame({other.name: other}, default_fill_value=self.default_fill_value) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 0b2842155b299..dd628385d539a 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -72,7 +72,8 @@ def __init__(self, frames, items=None, major_axis=None, minor_axis=None, frames = new_frames if not (isinstance(frames, dict)): - raise AssertionError() + raise AssertionError('input must be a dict, a {0} was' + ' passed'.format(type(frames))) self.default_fill_value = fill_value = default_fill_value self.default_kind = kind = default_kind diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index bd01845a295b6..5d5f3ddabeed3 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -110,8 +110,11 @@ def __new__(cls, data, index=None, sparse_index=None, kind='block', if isinstance(data, SparseSeries) and index is None: index = data.index elif index is not None: - if not (len(index) == len(data)): - raise AssertionError() + if len(index) != len(data): + raise AssertionError('Passed index and data must have the ' + 'same length, len(data) == {0}, ' + 'len(index) == ' + '{1}'.format(len(data), len(index))) sparse_index = data.sp_index values = np.asarray(data) @@ -129,8 +132,14 @@ def __new__(cls, data, index=None, sparse_index=None, kind='block', fill_value=fill_value) else: values = data - if not (len(values) == sparse_index.npoints): - raise AssertionError() + if len(values) != sparse_index.npoints: + raise AssertionError('length of input must the same as the' + ' length of the given index, ' + 'len(values) == {0}, ' + 'sparse_index.npoints' + ' == ' + '{1}'.format(len(values), + sparse_index.npoints)) else: if index is None: raise Exception('must pass index!') @@ -449,7 +458,7 @@ def sparse_reindex(self, new_index): reindexed : SparseSeries """ if not (isinstance(new_index, splib.SparseIndex)): - raise AssertionError() + raise AssertionError('new index must be a SparseIndex') new_values = self.sp_index.to_int_index().reindex(self.sp_values, self.fill_value, diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 13eeb03e15328..fd481e10159bb 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -634,8 +634,8 @@ def _set_window(self, window_type, window, min_periods): self._window_type = scom._get_window_type(window_type) if self._is_rolling: - if not ((window is not None)): - raise AssertionError() + if window is None: + raise AssertionError("'window' cannot be None") if min_periods is None: min_periods = window else: @@ -1212,8 +1212,9 @@ def _nobs_raw(self): return result.astype(int) def _beta_matrix(self, lag=0): - if not ((lag >= 0)): - raise AssertionError() + if lag < 0: + raise AssertionError("'lag' must be greater than or equal to 0, " + "input was {0}".format(lag)) betas = self._beta_raw @@ -1276,8 +1277,8 @@ def _filter_data(lhs, rhs, weights=None): Cleaned lhs and rhs """ if not isinstance(lhs, Series): - if not ((len(lhs) == len(rhs))): - raise AssertionError() + if len(lhs) != len(rhs): + raise AssertionError("length of lhs must equal length of rhs") lhs = Series(lhs, index=rhs.index) rhs = _combine_rhs(rhs) diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index 467ce6a05e1f0..aa2db7d2e2c66 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -101,10 +101,12 @@ def _prepare_data(self): y_regressor = y if weights is not None: - if not ((y_regressor.index.equals(weights.index))): - raise AssertionError() - if not ((x_regressor.index.equals(weights.index))): - raise AssertionError() + if not y_regressor.index.equals(weights.index): + raise AssertionError("y_regressor and weights must have the " + "same index") + if not x_regressor.index.equals(weights.index): + raise AssertionError("x_regressor and weights must have the " + "same index") rt_weights = np.sqrt(weights) y_regressor = y_regressor * rt_weights @@ -171,8 +173,10 @@ def _convert_x(self, x): # .iteritems iteritems = getattr(x, 'iteritems', x.items) for key, df in iteritems(): - if not ((isinstance(df, DataFrame))): - raise AssertionError() + if not isinstance(df, DataFrame): + raise AssertionError("all input items must be DataFrames, " + "at least one is of " + "type {0}".format(type(df))) if _is_numeric(df): x_converted[key] = df @@ -640,8 +644,9 @@ def _y_predict_raw(self): return (betas * x).sum(1) def _beta_matrix(self, lag=0): - if not ((lag >= 0)): - raise AssertionError() + if lag < 0: + raise AssertionError("'lag' must be greater than or equal to 0, " + "input was {0}".format(lag)) index = self._y_trans.index major_labels = index.labels[0] diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index b19d099790566..f19608752ad9a 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -404,17 +404,19 @@ def _validate_specification(self): elif self.left_on is not None: n = len(self.left_on) if self.right_index: - if not ((len(self.left_on) == self.right.index.nlevels)): - raise AssertionError() + if len(self.left_on) != self.right.index.nlevels: + raise AssertionError('len(left_on) must equal the number ' + 'of levels in the index of "right"') self.right_on = [None] * n elif self.right_on is not None: n = len(self.right_on) if self.left_index: - if not ((len(self.right_on) == self.left.index.nlevels)): - raise AssertionError() + if len(self.right_on) != self.left.index.nlevels: + raise AssertionError('len(right_on) must equal the number ' + 'of levels in the index of "left"') self.left_on = [None] * n - if not ((len(self.right_on) == len(self.left_on))): - raise AssertionError() + if len(self.right_on) != len(self.left_on): + raise AssertionError("len(right_on) must equal len(left_on)") def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'): @@ -427,8 +429,8 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'): ------- """ - if not ((len(left_keys) == len(right_keys))): - raise AssertionError() + if len(left_keys) != len(right_keys): + raise AssertionError('left_key and right_keys must be the same length') left_labels = [] right_labels = [] @@ -542,8 +544,11 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): if len(join_keys) > 1: if not ((isinstance(right_ax, MultiIndex) and - len(join_keys) == right_ax.nlevels) ): - raise AssertionError() + len(join_keys) == right_ax.nlevels)): + raise AssertionError("If more than one join key is given then " + "'right_ax' must be a MultiIndex and the " + "number of join keys must be the number of " + "levels in right_ax") left_tmp, right_indexer = \ _get_multiindex_indexer(join_keys, right_ax, @@ -642,8 +647,9 @@ def __init__(self, data_list, join_index, indexers, axis=1, copy=True): if axis <= 0: # pragma: no cover raise MergeError('Only axis >= 1 supported for this operation') - if not ((len(data_list) == len(indexers))): - raise AssertionError() + if len(data_list) != len(indexers): + raise AssertionError("data_list and indexers must have the same " + "length") self.units = [] for data, indexer in zip(data_list, indexers): @@ -936,8 +942,9 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, axis = 1 if axis == 0 else 0 self._is_series = isinstance(sample, Series) - if not ((0 <= axis <= sample.ndim)): - raise AssertionError() + if not 0 <= axis <= sample.ndim: + raise AssertionError("axis must be between 0 and {0}, " + "input was {1}".format(sample.ndim, axis)) # note: this is the BlockManager axis (since DataFrame is transposed) self.axis = axis @@ -1106,8 +1113,9 @@ def _concat_single_item(self, objs, item): to_concat.append(item_values) # this method only gets called with axis >= 1 - if not ((self.axis >= 1)): - raise AssertionError() + if self.axis < 1: + raise AssertionError("axis must be >= 1, input was" + " {0}".format(self.axis)) return com._concat_compat(to_concat, axis=self.axis - 1) def _get_result_dim(self): @@ -1126,8 +1134,9 @@ def _get_new_axes(self): continue new_axes[i] = self._get_comb_axis(i) else: - if not ((len(self.join_axes) == ndim - 1)): - raise AssertionError() + if len(self.join_axes) != ndim - 1: + raise AssertionError("length of join_axes must not be " + "equal to {0}".format(ndim - 1)) # ufff... indices = range(ndim) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 8d5ba7af0d92b..12ba39b2387c1 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -2,7 +2,6 @@ from pandas import Series, DataFrame from pandas.core.index import MultiIndex -from pandas.core.reshape import _unstack_multiple from pandas.tools.merge import concat import pandas.core.common as com import numpy as np @@ -300,8 +299,8 @@ def _get_names(arrs, names, prefix='row'): else: names.append('%s_%d' % (prefix, i)) else: - if not ((len(names) == len(arrs))): - raise AssertionError() + if len(names) != len(arrs): + raise AssertionError('arrays and names must have the same length') if not isinstance(names, list): names = list(names) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a918e9eb18e8b..a6b94184eb475 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -306,12 +306,12 @@ def _generate(cls, start, end, periods, name, offset, if tz is not None and inferred_tz is not None: if not inferred_tz == tz: - raise AssertionError() + raise AssertionError("Inferred time zone not equal to passed " + "time zone") elif inferred_tz is not None: tz = inferred_tz - if start is not None: if normalize: start = normalize_date(start) @@ -450,16 +450,16 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, cachedRange = drc[offset] if start is None: - if not (isinstance(end, Timestamp)): - raise AssertionError() + if not isinstance(end, Timestamp): + raise AssertionError('end must be an instance of Timestamp') end = offset.rollback(end) endLoc = cachedRange.get_loc(end) + 1 startLoc = endLoc - periods elif end is None: - if not (isinstance(start, Timestamp)): - raise AssertionError() + if not isinstance(start, Timestamp): + raise AssertionError('start must be an instance of Timestamp') start = offset.rollforward(start) @@ -586,14 +586,15 @@ def _format_native_types(self, na_rep=u'NaT', **kwargs): zero_time = time(0, 0) for d in data: if d.time() != zero_time or d.tzinfo is not None: - return [u'%s' % x for x in data ] + return [u'%s' % x for x in data] - values = np.array(data,dtype=object) + values = np.array(data, dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ]) + values[imask] = np.array([u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) + for dt in values[imask]]) return values.tolist() def isin(self, values): @@ -1067,7 +1068,6 @@ def intersection(self, other): return self._view_like(left_chunk) def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): - is_monotonic = self.is_monotonic if reso == 'year': @@ -1104,18 +1104,22 @@ def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): else: raise KeyError - stamps = self.asi8 if is_monotonic: # a monotonic (sorted) series can be sliced - left = stamps.searchsorted(t1.value, side='left') if use_lhs else None - right = stamps.searchsorted(t2.value, side='right') if use_rhs else None + left = None + if use_lhs: + left = stamps.searchsorted(t1.value, side='left') + + right = None + if use_rhs: + right = stamps.searchsorted(t2.value, side='right') return slice(left, right) - lhs_mask = (stamps>=t1.value) if use_lhs else True - rhs_mask = (stamps<=t2.value) if use_rhs else True + lhs_mask = (stamps >= t1.value) if use_lhs else True + rhs_mask = (stamps <= t2.value) if use_rhs else True # try to find a the dates return (lhs_mask & rhs_mask).nonzero()[0] @@ -1188,7 +1192,8 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) _, parsed, reso = parse_time_string(key, freq) - loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) + loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, + use_rhs=use_rhs) return loc def slice_indexer(self, start=None, end=None, step=None): @@ -1217,7 +1222,6 @@ def slice_locs(self, start=None, end=None): start_loc = self._get_string_slice(start).start else: start_loc = 0 - if end: end_loc = self._get_string_slice(end).stop else: @@ -1232,12 +1236,12 @@ def slice_locs(self, start=None, end=None): # so create an indexer directly try: if start: - start_loc = self._get_string_slice(start,use_rhs=False) + start_loc = self._get_string_slice(start, + use_rhs=False) else: start_loc = np.arange(len(self)) - if end: - end_loc = self._get_string_slice(end,use_lhs=False) + end_loc = self._get_string_slice(end, use_lhs=False) else: end_loc = np.arange(len(self)) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 34c640392bda9..aa3012ddf291a 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -4,7 +4,6 @@ from datetime import datetime, date import numpy as np -import pandas.tseries.offsets as offsets from pandas.tseries.frequencies import (get_freq_code as _gfc, _month_numbers, FreqGroup) from pandas.tseries.index import DatetimeIndex, Int64Index, Index @@ -213,7 +212,7 @@ def end_time(self): ordinal = (self + 1).start_time.value - 1 return Timestamp(ordinal) - def to_timestamp(self, freq=None, how='start',tz=None): + def to_timestamp(self, freq=None, how='start', tz=None): """ Return the Timestamp representation of the Period at the target frequency at the specified end (how) of the Period @@ -241,7 +240,7 @@ def to_timestamp(self, freq=None, how='start',tz=None): val = self.asfreq(freq, how) dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base) - return Timestamp(dt64,tz=tz) + return Timestamp(dt64, tz=tz) year = _period_field_accessor('year', 0) month = _period_field_accessor('month', 3) @@ -308,7 +307,6 @@ def __unicode__(self): return value - def strftime(self, fmt): """ Returns the string representation of the :class:`Period`, depending @@ -500,13 +498,13 @@ def _period_index_cmp(opname): def wrapper(self, other): if isinstance(other, Period): func = getattr(self.values, opname) - if not (other.freq == self.freq): - raise AssertionError() + if other.freq != self.freq: + raise AssertionError("Frequencies must be equal") result = func(other.ordinal) elif isinstance(other, PeriodIndex): - if not (other.freq == self.freq): - raise AssertionError() + if other.freq != self.freq: + raise AssertionError("Frequencies must be equal") return getattr(self.values, opname)(other.values) else: other = Period(other, freq=self.freq) @@ -724,7 +722,6 @@ def asof_locs(self, where, mask): @property def asobject(self): - from pandas.core.index import Index return Index(self._box_values(self.values), dtype=object) def _array_values(self): @@ -960,7 +957,7 @@ def get_loc(self, key): key = Period(key, self.freq) try: return self._engine.get_loc(key.ordinal) - except KeyError as inst: + except KeyError: raise KeyError(key) def slice_locs(self, start=None, end=None): @@ -1080,12 +1077,11 @@ def _format_with_header(self, header, **kwargs): def _format_native_types(self, na_rep=u'NaT', **kwargs): - values = np.array(list(self),dtype=object) + values = np.array(list(self), dtype=object) mask = isnull(self.values) values[mask] = na_rep - imask = -mask - values[imask] = np.array([ u'%s' % dt for dt in values[imask] ]) + values[imask] = np.array([u'%s' % dt for dt in values[imask]]) return values.tolist() def __array_finalize__(self, obj): @@ -1184,13 +1180,14 @@ def __setstate__(self, state): nd_state, own_state = state np.ndarray.__setstate__(self, nd_state) self.name = own_state[0] - try: # backcompat + try: # backcompat self.freq = own_state[1] except: pass else: # pragma: no cover np.ndarray.__setstate__(self, state) + def _get_ordinal_range(start, end, periods, freq): if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify 2 of start, end, periods') @@ -1249,8 +1246,8 @@ def _range_from_fields(year=None, month=None, quarter=None, day=None, base, mult = _gfc(freq) if mult != 1: raise ValueError('Only mult == 1 supported') - if not (base == FreqGroup.FR_QTR): - raise AssertionError() + if base != FreqGroup.FR_QTR: + raise AssertionError("base must equal FR_QTR") year, quarter = _make_field_arrays(year, quarter) for y, q in zip(year, quarter): diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 4bf0a5bf3182c..98b4d9afbfb62 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -119,8 +119,9 @@ def _get_time_grouper(self, obj): return binner, grouper def _get_time_bins(self, axis): - if not (isinstance(axis, DatetimeIndex)): - raise AssertionError() + if not isinstance(axis, DatetimeIndex): + raise AssertionError('axis must be a DatetimeIndex, but got ' + 'a {0}'.format(type(axis))) if len(axis) == 0: binner = labels = DatetimeIndex(data=[], freq=self.freq) @@ -179,8 +180,9 @@ def _adjust_bin_edges(self, binner, ax_values): return binner, bin_edges def _get_time_period_bins(self, axis): - if not(isinstance(axis, DatetimeIndex)): - raise AssertionError() + if not isinstance(axis, DatetimeIndex): + raise AssertionError('axis must be a DatetimeIndex, ' + 'but was a {0}'.format(type(axis))) if len(axis) == 0: binner = labels = PeriodIndex(data=[], freq=self.freq) @@ -210,8 +212,8 @@ def _resample_timestamps(self, obj): result = grouped.aggregate(self._agg_method) else: # upsampling shortcut - if not (self.axis == 0): - raise AssertionError() + if self.axis: + raise AssertionError('axis must be 0') if self.closed == 'right': res_index = binner[1:] @@ -277,7 +279,6 @@ def _resample_periods(self, obj): def _take_new_index(obj, indexer, new_index, axis=0): from pandas.core.api import Series, DataFrame - from pandas.core.internals import BlockManager if isinstance(obj, Series): new_values = com.take_1d(obj.values, indexer) @@ -285,7 +286,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): elif isinstance(obj, DataFrame): if axis == 1: raise NotImplementedError - return DataFrame(obj._data.take(indexer,new_index=new_index,axis=1)) + return DataFrame(obj._data.take(indexer, new_index=new_index, axis=1)) else: raise NotImplementedError diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 62ee19da6b845..a73017618fdbe 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -29,7 +29,8 @@ def _infer(a, b): tz = a.tzinfo if b and b.tzinfo: if not (tslib.get_timezone(tz) == tslib.get_timezone(b.tzinfo)): - raise AssertionError() + raise AssertionError('Inputs must both have the same timezone,' + ' {0} != {1}'.format(tz, b.tzinfo)) return tz tz = None if start is not None: diff --git a/scripts/parse_asserts.py b/scripts/parse_asserts.py new file mode 100755 index 0000000000000..b651de4ce26de --- /dev/null +++ b/scripts/parse_asserts.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python + +import re +import os +import fnmatch +import ast +import argparse +import inspect +import sys +import tempfile +import subprocess +import operator + +try: + from importlib import import_module +except ImportError: + import_module = __import__ + + +from numpy import nan as NA +from pandas import DataFrame +from pandas.core.config import option_context + + +def parse_interp_string(node): + assert isinstance(node, ast.BinOp) + assert isinstance(node.op, ast.Mod) + assert isinstance(node.left, ast.Str) + return node.left.s + + +def parse_format_string(node): + assert isinstance(node, ast.Call) + assert isinstance(node.func, ast.Attribute) + assert isinstance(node.func.value, ast.Str) + return node.func.value.s + + +def try_parse_raise_arg(node): + try: + # string + v = node.s + except AttributeError: + try: + # interpolated string + v = parse_interp_string(node) + except AssertionError: + try: + # format spec string + v = parse_format_string(node) + except AssertionError: + # otherwise forget it (general expr node) + v = node + return v + + +def parse_file(pyfile, asserts): + with open(pyfile, 'r') as pyf: + source = pyf.read() + + try: + parsed = ast.parse(source, pyfile, 'exec') + except SyntaxError: + return + + for node in ast.walk(parsed): + if isinstance(node, ast.Raise): + k = pyfile, node.lineno, node.col_offset + + try: + # try to get the name of the exception constructor + asserts[k] = [node.type.func.id] + except AttributeError: + # not a constructor + asserts[k] = [NA] + else: + # is constructor, try parsing its contents + try: + # function arguments + args = node.type.args + + try: + # try to get the first argument + arg = args[0] + v = try_parse_raise_arg(arg) + asserts[k].append(v) + except IndexError: + # no arguments (e.g., raise Exception()) + asserts[k].append(NA) + + except AttributeError: + # no arguments (e.g., raise Exception) + asserts[k].append(NA) + + +def path_matches(path, pattern): + return re.search(pattern, path) is not None + + +def regex_or(*patterns): + return '({0})'.format('|'.join(patterns)) + + +def get_asserts_from_path(path, file_filters, dir_filters): + if file_filters is None: + file_filters = 'test', '__init__.py' + + file_filters = regex_or(*file_filters) + + if dir_filters is None: + dir_filters = 'build', '.tox', 'test', '.*\.egg.*' + + dir_filters = regex_or(*dir_filters) + + asserts = {} + + if os.path.isfile(path): + parse_file(path, asserts) + return asserts + + for root, _, filenames in os.walk(path): + full_names = [] + + if not path_matches(root, dir_filters): + full_names = [os.path.join(root, fn) for fn in filenames + if not path_matches(fn, file_filters)] + + if full_names: + pyfiles = fnmatch.filter(full_names, '*.py') + + if pyfiles: + for pyfile in pyfiles: + parse_file(pyfile, asserts) + + return asserts + + +def obj_path_from_string(dotted_name, full_path): + try: + obj = import_module(dotted_name) + except ImportError: + splits_ville = dotted_name.split('.') + module_name, obj_name = splits_ville[:-1], splits_ville[-1] + module_name = '.'.join(module_name) + + try: + module = import_module(module_name) + except ImportError: + raise ImportError("'{0}' is not a valid Python " + "module".format(module_name)) + else: + try: + obj = getattr(module, obj_name) + except AttributeError: + raise AttributeError("") + + if full_path: + path = inspect.getabsfile(obj) + else: + path = inspect.getfile(obj) + + if path.endswith('pyc'): + path = path.strip('c') + return os.path.dirname(path) + + +def get_asserts_from_obj(dotted_name, file_filters, dir_filters, full_path): + path = obj_path_from_string(dotted_name, full_path) + return get_asserts_from_path(path, file_filters, dir_filters) + + +def asserts_to_frame(asserts): + index, values = zip(*asserts.iteritems()) + values = map(lambda x: list(reduce(operator.concat, map(list, x))), + asserts.iteritems()) + columns = 'filename', 'line', 'col', 'code', 'msg' + df = DataFrame(values, columns=columns).fillna(NA) + return df + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-t', '--type', default='all', + choices=('all', 'a', 'empty', 'e', 'nonempty', 'n'), + help='The type of nodes you want to look for') + parser.add_argument('-m', '--module', default='pandas', + help=('The name of a module or file to search for ' + 'nodes in')) + parser.add_argument('-i', '--file-filters', default=None, nargs='*', + help=("A list of regular expressions describing files " + "you want to ignore")) + parser.add_argument('-d', '--dir-filters', default=None, nargs='*', + help=('A list of regular expressions describing' + ' directories you want to ignore')) + parser.add_argument('-s', '--sparse-filename', action='store_true', + help=('Use multi_sparse = False to show the ' + 'resulting DataFrame')) + parser.add_argument('-p', '--full-path', action='store_true', + help=('Display the entire path of the file if this ' + 'is given')) + parser.add_argument('-k', '--exception-types', nargs='*', + help='The types of exceptions to report') + parser.add_argument('-b', '--sort-by', default='line', nargs='*', + help=('A list of columns or index levels you want to ' + 'sort by')) + return parser.parse_args() + + +def _build_exc_regex(exc_list): + return r'(.*(?:{0}).*)'.format('|'.join(exc_list)) + + +def main(args): + asserts = get_asserts_from_obj(args.module, args.file_filters, + args.dir_filters, args.full_path) + + if not asserts: + print "No asserts found in '{0}'".format(args.module) + return 0 + + df = asserts_to_frame(asserts) + + try: + df.sortlevel(args.sort_by, inplace=True) + except Exception: + df.sort(args.sort_by, inplace=True) + + atype = args.type + + msg = 'No' + + if atype.startswith('e'): + ind = df.msg.isnull() + msg += ' empty' + elif atype.startswith('n'): + ind = df.msg.notnull() + msg += ' nonempty' + else: + ind = slice(None) + + df = df[ind] + df.sort_index(inplace=True) + + if df.empty: + print "{0} {1} found in '{2}'".format(msg, args.exception_types, + args.module) + return 0 + max_cols = int(df.msg.map(lambda x: len(repr(x))).max()) + with option_context('display.multi_sparse', args.sparse_filename, + 'display.max_colwidth', max_cols, + 'display.max_seq_items', max_cols): + if args.exception_types is not None: + regex = _build_exc_regex(args.exception_types) + vals = df.code.str.match(regex, re.I) + df = df[vals.str[0].notnull()] + + if df.empty: + msg = "{0} {1} found in '{2}'".format(msg, + args.exception_types, + args.module) + print msg + return 0 + + with tempfile.NamedTemporaryFile() as tmpf: + df.to_string(buf=tmpf) + return subprocess.call([os.environ.get('PAGER', 'less'), + tmpf.name]) + return df + + +if __name__ == '__main__': + sys.exit(main(parse_args()))