diff --git a/ci/lint.sh b/ci/lint.sh index 22f8628f59dcd..43d6ea0c118b0 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -24,7 +24,7 @@ if [ "$LINT" ]; then echo "Linting setup.py DONE" echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126 + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then RET=1 fi @@ -34,7 +34,7 @@ if [ "$LINT" ]; then for path in 'src' do echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126 + flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index a44a7288bda45..e9ef9c4ffe24b 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -258,7 +258,7 @@ def min_subseq(ndarray[double_t] arr): return (s, e, -m) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Pairwise correlation/covariance @@ -322,7 +322,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): return result -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Pairwise Spearman correlation @@ -386,6 +386,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): return result + # generated from template include "algos_common_helper.pxi" include "algos_rank_helper.pxi" diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 1cb7b18fa4f61..2fbbc81c4b5a1 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -25,7 +25,7 @@ cdef double nan = NaN # TODO: aggregate multiple columns in single pass -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # first, nth, last diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 06ed947808e39..53203dd30daee 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -93,22 +93,26 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): free(lens) return result + cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: return (x << b) | (x >> (64 - b)) + cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil: p[0] = (v) p[1] = (v >> 8) p[2] = (v >> 16) p[3] = (v >> 24) + cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil: u32to8_le(p, v) u32to8_le(p + 4, (v >> 32)) + cdef inline uint64_t u8to64_le(uint8_t* p) nogil: return (p[0] | - p[1] << 8 | + p[1] << 8 | p[2] << 16 | p[3] << 24 | p[4] << 32 | @@ -116,6 +120,7 @@ cdef inline uint64_t u8to64_le(uint8_t* p) nogil: p[6] << 48 | p[7] << 56) + cdef inline void _sipround(uint64_t* v0, uint64_t* v1, uint64_t* v2, uint64_t* v3) nogil: v0[0] += v1[0] @@ -133,6 +138,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v1[0] ^= v2[0] v2[0] = _rotl(v2[0], 32) + cpdef uint64_t siphash(bytes data, bytes key) except? 0: if len(key) != 16: raise ValueError( diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index c96251a0293d6..e98c0131e9c44 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -122,7 +122,7 @@ cdef class IndexEngine: if not self.is_unique: return self._get_loc_duplicates(val) values = self._get_index_values() - loc = _bin_search(values, val) # .searchsorted(val, side='left') + loc = _bin_search(values, val) # .searchsorted(val, side='left') if loc >= len(values): raise KeyError(val) if util.get_value_at(values, loc) != val: @@ -475,15 +475,14 @@ cdef class DatetimeEngine(Int64Engine): if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') - return algos.pad_int64(self._get_index_values(), other, - limit=limit) + return algos.pad_int64(self._get_index_values(), other, limit=limit) def get_backfill_indexer(self, other, limit=None): if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') return algos.backfill_int64(self._get_index_values(), other, - limit=limit) + limit=limit) cdef class TimedeltaEngine(DatetimeEngine): diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 264a983fe4d53..c09642511207a 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -13,6 +13,7 @@ from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, import numbers _VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) + cdef class IntervalMixin: property closed_left: def __get__(self): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 33c3650fa0425..8dbc70a0bdbe9 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -147,7 +147,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, - Py_ssize_t max_groups): + Py_ssize_t max_groups): cdef: Py_ssize_t i, j, k, count = 0 ndarray[int64_t] left_count, right_count, left_sorter, right_sorter diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index b4687df8785dd..f882c3d7a7621 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -82,6 +82,7 @@ def values_from_object(object o): return o + cpdef map_indices_list(list index): """ Produce a dict mapping the values of the input array to their respective @@ -116,7 +117,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr): s += arr[i].__sizeof__() return s -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # isnull / notnull related cdef double INF = np.inf @@ -125,7 +127,7 @@ cdef double NEGINF = -INF cpdef bint checknull(object val): if util.is_float_object(val) or util.is_complex_object(val): - return val != val # and val != INF and val != NEGINF + return val != val # and val != INF and val != NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT elif val is NaT: @@ -990,7 +992,7 @@ def convert_json_to_lines(object arr): in_quotes = ~in_quotes if v == backslash or is_escaping: is_escaping = ~is_escaping - if v == comma: # commas that should be \n + if v == comma: # commas that should be \n if num_open_brackets_seen == 0 and not in_quotes: narr[i] = newline elif v == left_bracket: @@ -1015,7 +1017,7 @@ def write_csv_rows(list data, ndarray data_index, # In crude testing, N>100 yields little marginal improvement N=100 - # pre-allocate rows + # pre-allocate rows ncols = len(cols) rows = [[None] * (nlevels + ncols) for x in range(N)] @@ -1047,12 +1049,13 @@ def write_csv_rows(list data, ndarray data_index, if j >= N - 1 and j % N == N - 1: writer.writerows(rows) - if j >= 0 and (j < N - 1 or (j % N) != N - 1): + if j >= 0 and (j < N - 1 or (j % N) != N - 1): writer.writerows(rows[:((j + 1) % N)]) -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Groupby-related functions + @cython.boundscheck(False) def arrmap(ndarray[object] index, object func): cdef int length = index.shape[0] @@ -1136,7 +1139,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, bins = np.empty(lenbin - 1, dtype=np.int64) j = 0 # index into values - bc = 0 # bin count + bc = 0 # bin count # linear scan if right_closed: @@ -1285,9 +1288,9 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, cdef class _PandasNull: def __richcmp__(_PandasNull self, object other, int op): - if op == 2: # == + if op == 2: # == return isinstance(other, _PandasNull) - elif op == 3: # != + elif op == 3: # != return not isinstance(other, _PandasNull) else: return False @@ -1793,7 +1796,7 @@ cdef class BlockPlacement: stop += other_int if ((step > 0 and start < 0) or - (step < 0 and stop < step)): + (step < 0 and stop < step)): raise ValueError("iadd causes length change") if stop < 0: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 0dacdf70a71d5..a5ce6c560d844 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h": # Store words in (potentially ragged) matrix for now, hmm char **words - int64_t *word_starts # where we are in the stream + int64_t *word_starts # where we are in the stream int64_t words_len int64_t words_cap @@ -400,7 +400,7 @@ cdef class TextReader: raise ValueError('only length-1 separators excluded right now') self.parser.delimiter = ord(delimiter) - #---------------------------------------- + # ---------------------------------------- # parser options self.parser.doublequote = doublequote @@ -519,7 +519,7 @@ cdef class TextReader: self.index_col = index_col - #---------------------------------------- + # ---------------------------------------- # header stuff self.allow_leading_cols = allow_leading_cols @@ -810,7 +810,7 @@ cdef class TextReader: if hr == self.header[-1]: lc = len(this_header) ic = (len(self.index_col) if self.index_col - is not None else 0) + is not None else 0) if lc != unnamed_count and lc - ic > unnamed_count: hr -= 1 self.parser_start -= 1 @@ -848,7 +848,7 @@ cdef class TextReader: # Corner case, not enough lines in the file if self.parser.lines < data_line + 1: field_count = len(header[0]) - else: # not self.has_usecols: + else: # not self.has_usecols: field_count = self.parser.line_fields[data_line] @@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst): result.append(x) return result + cdef asbytes(object o): if PY3: return str(o).encode('utf-8') @@ -1417,11 +1418,13 @@ def _maybe_upcast(arr): return arr + cdef enum StringPath: CSTRING UTF8 ENCODED + # factored out logic to pick string converter cdef inline StringPath _string_path(char *encoding): if encoding != NULL and encoding != b"utf-8": @@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding): return UTF8 else: return CSTRING + + # ---------------------------------------------------------------------- # Type conversions / inference support code + cdef _string_box_factorize(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): @@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser, parser.sci, parser.thousands, 1) if errno != 0 or p_end[0] or p_end == word: if (strcasecmp(word, cinf) == 0 or - strcasecmp(word, cposinf) == 0): + strcasecmp(word, cposinf) == 0): data[0] = INF elif strcasecmp(word, cneginf) == 0: data[0] = NEGINF @@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser, parser.sci, parser.thousands, 1) if errno != 0 or p_end[0] or p_end == word: if (strcasecmp(word, cinf) == 0 or - strcasecmp(word, cposinf) == 0): + strcasecmp(word, cposinf) == 0): data[0] = INF elif strcasecmp(word, cneginf) == 0: data[0] = NEGINF @@ -2263,6 +2269,7 @@ def _compute_na_values(): } return na_values + na_values = _compute_na_values() for k in list(na_values): @@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols): return recs + cdef _fill_structured_column(char *dst, char* src, int64_t elsize, int64_t stride, int64_t length, bint incref): cdef: diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 4b8c86ae9d4b2..0456033dbb731 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -109,8 +109,8 @@ cdef extern from "period_helper.h": initialize_daytime_conversion_factor_matrix() +# ---------------------------------------------------------------------- # Period logic -#---------------------------------------------------------------------- @cython.wraparound(False) @@ -168,9 +168,11 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): return out + cdef char START = 'S' cdef char END = 'E' + cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, bint end): """ @@ -278,31 +280,31 @@ def period_format(int64_t value, int freq, object fmt=None): if fmt is None: freq_group = (freq // 1000) * 1000 - if freq_group == 1000: # FR_ANN + if freq_group == 1000: # FR_ANN fmt = b'%Y' - elif freq_group == 2000: # FR_QTR + elif freq_group == 2000: # FR_QTR fmt = b'%FQ%q' - elif freq_group == 3000: # FR_MTH + elif freq_group == 3000: # FR_MTH fmt = b'%Y-%m' - elif freq_group == 4000: # WK + elif freq_group == 4000: # WK left = period_asfreq(value, freq, 6000, 0) right = period_asfreq(value, freq, 6000, 1) return '%s/%s' % (period_format(left, 6000), period_format(right, 6000)) - elif (freq_group == 5000 # BUS - or freq_group == 6000): # DAY + elif (freq_group == 5000 # BUS + or freq_group == 6000): # DAY fmt = b'%Y-%m-%d' - elif freq_group == 7000: # HR + elif freq_group == 7000: # HR fmt = b'%Y-%m-%d %H:00' - elif freq_group == 8000: # MIN + elif freq_group == 8000: # MIN fmt = b'%Y-%m-%d %H:%M' - elif freq_group == 9000: # SEC + elif freq_group == 9000: # SEC fmt = b'%Y-%m-%d %H:%M:%S' - elif freq_group == 10000: # MILLISEC + elif freq_group == 10000: # MILLISEC fmt = b'%Y-%m-%d %H:%M:%S.%l' - elif freq_group == 11000: # MICROSEC + elif freq_group == 11000: # MICROSEC fmt = b'%Y-%m-%d %H:%M:%S.%u' - elif freq_group == 12000: # NANOSEC + elif freq_group == 12000: # NANOSEC fmt = b'%Y-%m-%d %H:%M:%S.%n' else: raise ValueError('Unknown freq: %d' % freq) @@ -730,7 +732,7 @@ cdef class _Period(object): return Period(ordinal=ordinal, freq=self.freq) msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - else: # pragma no cover + else: # pragma no cover return NotImplemented def __add__(self, other): @@ -1148,8 +1150,8 @@ class Period(_Period): elif value is None: if (year is None and month is None and - quarter is None and day is None and - hour is None and minute is None and second is None): + quarter is None and day is None and + hour is None and minute is None and second is None): ordinal = iNaT else: if freq is None: diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index fac678e531c8b..5484cbda5bdf9 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -328,7 +328,7 @@ cdef class BlockIndex(SparseIndex): ndarray blocs, blengths cdef: - object __weakref__ # need to be picklable + object __weakref__ # need to be picklable int32_t *locbuf int32_t *lenbuf @@ -486,7 +486,7 @@ cdef class BlockIndex(SparseIndex): cur_length = xlen[xi] xi += 1 - else: # xloc[xi] < yloc[yi] + else: # xloc[xi] < yloc[yi] cur_loc = yloc[yi] diff = yloc[yi] - xloc[xi] @@ -629,7 +629,7 @@ cdef class BlockMerge(object): cdef: BlockIndex x, y, result ndarray xstart, xlen, xend, ystart, ylen, yend - int32_t xi, yi # block indices + int32_t xi, yi # block indices def __init__(self, BlockIndex x, BlockIndex y): self.x = x diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 8fab825eae428..5d550148b10bc 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -448,8 +448,8 @@ def infer_dtype(object value, bint skipna=False): for i in range(n): val = util.get_value_1d(values, i) if (util.is_integer_object(val) and - not util.is_timedelta64_object(val) and - not util.is_datetime64_object(val)): + not util.is_timedelta64_object(val) and + not util.is_datetime64_object(val)): return 'mixed-integer' return 'mixed' diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx index f578eb2f4a346..d1761384114ef 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/src/reduce.pyx @@ -1,4 +1,5 @@ -#cython=False +# -*- coding: utf-8 -*- +# cython: profile=False import numpy as np from distutils.version import LooseVersion @@ -512,7 +513,7 @@ def apply_frame_axis0(object frame, object f, object names, for i in range(n): slider.move(starts[i], ends[i]) - item_cache.clear() # ugh + item_cache.clear() # ugh object.__setattr__(slider.dummy, 'name', names[i]) piece = f(slider.dummy) @@ -532,6 +533,7 @@ def apply_frame_axis0(object frame, object f, object names, return results, mutated + cdef class BlockSlider: """ Only capable of sliding on axis=0 diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 025533b29366f..d2492064c900c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -80,7 +80,7 @@ UTC = pytz.utc # initialize numpy import_array() -# import_ufunc() + cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT @@ -421,7 +421,7 @@ class Timestamp(_Timestamp): def _round(self, freq, rounder): cdef: - int64_t unit, r, value, buff = 1000000 + int64_t unit, r, value, buff = 1000000 object result from pandas.tseries.frequencies import to_offset @@ -620,7 +620,7 @@ class Timestamp(_Timestamp): # tz naive, localize tz = maybe_get_tz(tz) if not is_string_object(ambiguous): - ambiguous = [ambiguous] + ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, ambiguous=ambiguous, errors=errors)[0] return Timestamp(value, tz=tz) @@ -809,6 +809,7 @@ class Timestamp(_Timestamp): # ---------------------------------------------------------------------- + cdef inline bint _check_all_nulls(object val): """ utility to check if a value is any type of null """ cdef bint res @@ -1040,7 +1041,7 @@ cdef class _Timestamp(datetime): if self.tzinfo is None: if other.tzinfo is not None: raise TypeError('Cannot compare tz-naive and tz-aware ' - 'timestamps') + 'timestamps') elif other.tzinfo is None: raise TypeError('Cannot compare tz-naive and tz-aware timestamps') @@ -1210,10 +1211,10 @@ cdef class _Timestamp(datetime): # format a Timestamp with only _date_repr if possible # otherwise _repr_base if (self.hour == 0 and - self.minute == 0 and - self.second == 0 and - self.microsecond == 0 and - self.nanosecond == 0): + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): return self._date_repr return self._repr_base @@ -1332,8 +1333,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, # sort of a temporary hack if ts.tzinfo is not None: - if (hasattr(tz, 'normalize') and - hasattr(ts.tzinfo, '_utcoffset')): + if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): ts = tz.normalize(ts) obj.value = pydatetime_to_dt64(ts, &obj.dts) obj.tzinfo = ts.tzinfo @@ -1682,7 +1682,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if not need_to_iterate: if ((fvalues < _NS_LOWER_BOUND).any() - or (fvalues > _NS_UPPER_BOUND).any()): + or (fvalues > _NS_UPPER_BOUND).any()): raise OutOfBoundsDatetime( "cannot convert input with unit '{0}'".format(unit)) result = (iresult *m).astype('M8[ns]') @@ -1950,7 +1950,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("{0} is not convertible to datetime" .format(type(val))) - if seen_datetime and seen_integer: + if seen_datetime and seen_integer: # we have mixed datetimes & integers if is_coerce: @@ -2027,9 +2027,9 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', cdef class _Timedelta(timedelta): cdef readonly: - int64_t value # nanoseconds - object freq # frequency reference - bint is_populated # are my components populated + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns def __hash__(_Timedelta self): @@ -2190,20 +2190,20 @@ class Timedelta(_Timedelta): if value is _no_input: if not len(kwargs): - raise ValueError( - "cannot construct a Timedelta without a value/unit or " - "descriptive keywords (days,seconds....)") + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") def _to_py_int_float(v): if is_integer_object(v): return int(v) elif is_float_object(v): return float(v) - raise TypeError( - "Invalid type {0}. Must be int or float.".format(type(v))) + raise TypeError("Invalid type {0}. Must be int or " + "float.".format(type(v))) kwargs = dict([(k, _to_py_int_float(v)) - for k, v in iteritems(kwargs)]) + for k, v in iteritems(kwargs)]) try: nano = kwargs.pop('nanoseconds', 0) @@ -2233,9 +2233,8 @@ class Timedelta(_Timedelta): elif _checknull_with_nat(value): return NaT else: - raise ValueError( - "Value must be Timedelta, string, integer, " - "float, timedelta or convertible") + raise ValueError("Value must be Timedelta, string, integer, " + "float, timedelta or convertible") if is_timedelta64_object(value): value = value.view('i8') @@ -2389,6 +2388,7 @@ class Timedelta(_Timedelta): def __repr__(self): return "Timedelta('{0}')".format(self._repr_base(format='long')) + def __str__(self): return self._repr_base(format='long') @@ -2674,6 +2674,7 @@ class Timedelta(_Timedelta): __pos__ = _op_unary_method(lambda x: x, '__pos__') __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + # resolution in ns Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe729594526ef..478d3bba80b00 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -206,8 +206,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): dt64_to_dtstruct(v, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = (int(get_utcoffset(tz1, dt).total_seconds()) - * 1000000000) + delta = (int(get_utcoffset(tz1, dt).total_seconds()) * + 1000000000) utc_dates[i] = v - delta else: trans, deltas, typ = get_dst_info(tz1) @@ -246,8 +246,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): dt64_to_dtstruct(v, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = (int(get_utcoffset(tz2, dt).total_seconds()) - * 1000000000) + delta = (int(get_utcoffset(tz2, dt).total_seconds()) * + 1000000000) result[i] = v + delta return result @@ -414,7 +414,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, raise pytz.AmbiguousTimeError( "There are %i dst switches when " "there should only be 1." % switch_idx.size) - switch_idx = switch_idx[0] + 1 # Pull the only index and adjust + switch_idx = switch_idx[0] + 1 + # Pull the only index and adjust a_idx = grp[:switch_idx] b_idx = grp[switch_idx:] dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 53ed8ddf22f4b..b40646295cce5 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -40,13 +40,13 @@ def build_field_sarray(ndarray[int64_t] dtindex): count = len(dtindex) - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond + sa_dtype = [('Y', 'i4'), # year + ('M', 'i4'), # month + ('D', 'i4'), # day + ('h', 'i4'), # hour + ('m', 'i4'), # min + ('s', 'i4'), # second + ('u', 'i4')] # microsecond out = np.empty(count, dtype=sa_dtype) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index f7889d76abbc7..9d810bfb411af 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -10,89 +10,15 @@ np.import_array() from util cimport is_integer_object - -cpdef get_freq_code(freqstr): - """ - Return freq str or tuple to freq code and stride (mult) - - Parameters - ---------- - freqstr : str or tuple - - Returns - ------- - return : tuple of base frequency code and stride (mult) - - Example - ------- - >>> get_freq_code('3D') - (6000, 3) - - >>> get_freq_code('D') - (6000, 1) - - >>> get_freq_code(('D', 3)) - (6000, 3) - """ - if getattr(freqstr, '_typ', None) == 'dateoffset': - freqstr = (freqstr.rule_code, freqstr.n) - - if isinstance(freqstr, tuple): - if (is_integer_object(freqstr[0]) and - is_integer_object(freqstr[1])): - # e.g., freqstr = (2000, 1) - return freqstr - else: - # e.g., freqstr = ('T', 5) - try: - code = _period_str_to_code(freqstr[0]) - stride = freqstr[1] - except: - if is_integer_object(freqstr[1]): - raise - code = _period_str_to_code(freqstr[1]) - stride = freqstr[0] - return code, stride - - if is_integer_object(freqstr): - return (freqstr, 1) - - base, stride = _base_and_stride(freqstr) - code = _period_str_to_code(base) - - return code, stride - +# ---------------------------------------------------------------------- +# Constants # hack to handle WOM-1MON opattern = re.compile( r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' ) - -cpdef _base_and_stride(freqstr): - """ - Return base freq and stride info from string representation - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return (base, stride) - +_INVALID_FREQ_ERROR = "Invalid frequency: {0}" # --------------------------------------------------------------------- # Period codes @@ -147,8 +73,8 @@ _period_code_map = { "S": 9000, # Secondly "L": 10000, # Millisecondly "U": 11000, # Microsecondly - "N": 12000, # Nanosecondly -} + "N": 12000} # Nanosecondly + _reverse_period_code_map = { _period_code_map[key]: key for key in _period_code_map} @@ -159,23 +85,20 @@ _period_code_map.update({'Y' + key[1:]: _period_code_map[key] if key.startswith('A-')}) _period_code_map.update({ - "Q": 2000, # Quarterly - December year end (default quarterly) - "A": 1000, # Annual - "W": 4000, # Weekly - "C": 5000, # Custom Business Day - }) - -_dont_uppercase = set(('MS', 'ms')) + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000}) # Custom Business Day _lite_rule_alias = { 'W': 'W-SUN', 'Q': 'Q-DEC', - 'A': 'A-DEC', # YearEnd(month=12), + 'A': 'A-DEC', # YearEnd(month=12), 'Y': 'A-DEC', - 'AS': 'AS-JAN', # YearBegin(month=1), + 'AS': 'AS-JAN', # YearBegin(month=1), 'YS': 'AS-JAN', - 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BA': 'BA-DEC', # BYearEnd(month=12), 'BY': 'BA-DEC', 'BAS': 'BAS-JAN', # BYearBegin(month=1), 'BYS': 'BAS-JAN', @@ -186,7 +109,85 @@ _lite_rule_alias = { 'us': 'U', 'ns': 'N'} -_INVALID_FREQ_ERROR = "Invalid frequency: {0}" +_dont_uppercase = set(('MS', 'ms')) + +# ---------------------------------------------------------------------- + +cpdef get_freq_code(freqstr): + """ + Return freq str or tuple to freq code and stride (mult) + + Parameters + ---------- + freqstr : str or tuple + + Returns + ------- + return : tuple of base frequency code and stride (mult) + + Example + ------- + >>> get_freq_code('3D') + (6000, 3) + + >>> get_freq_code('D') + (6000, 1) + + >>> get_freq_code(('D', 3)) + (6000, 3) + """ + if getattr(freqstr, '_typ', None) == 'dateoffset': + freqstr = (freqstr.rule_code, freqstr.n) + + if isinstance(freqstr, tuple): + if (is_integer_object(freqstr[0]) and + is_integer_object(freqstr[1])): + # e.g., freqstr = (2000, 1) + return freqstr + else: + # e.g., freqstr = ('T', 5) + try: + code = _period_str_to_code(freqstr[0]) + stride = freqstr[1] + except: + if is_integer_object(freqstr[1]): + raise + code = _period_str_to_code(freqstr[1]) + stride = freqstr[0] + return code, stride + + if is_integer_object(freqstr): + return (freqstr, 1) + + base, stride = _base_and_stride(freqstr) + code = _period_str_to_code(base) + + return code, stride + + +cpdef _base_and_stride(freqstr): + """ + Return base freq and stride info from string representation + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return (base, stride) cpdef _period_str_to_code(freqstr): diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 845d1b8dcabba..90882eefd9f67 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -44,10 +44,14 @@ from dateutil.relativedelta import relativedelta from dateutil.parser import DEFAULTPARSER from dateutil.parser import parse as du_parse +# ---------------------------------------------------------------------- +# Constants + class DateParseError(ValueError): pass + _nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN']) _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, @@ -64,6 +68,8 @@ cdef set _not_datelike_strings = set(['a', 'A', 'm', 'M', 'p', 'P', 't', 'T']) NAT_SENTINEL = object() # This allows us to reference NaT without having to import it +# ---------------------------------------------------------------------- + def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=False, **kwargs): @@ -199,7 +205,7 @@ cpdef bint _does_string_look_like_datetime(object date_string): cdef inline object _parse_dateabbr_string(object date_string, object default, - object freq): + object freq): cdef: object ret int year, quarter = -1, month, mnum, date_len @@ -317,7 +323,7 @@ def dateutil_parse(object timestr, object default, ignoretz=False, res = DEFAULTPARSER._parse(fobj, **kwargs) # dateutil 2.2 compat - if isinstance(res, tuple): # PyTuple_Check + if isinstance(res, tuple): # PyTuple_Check res, _ = res if res is None: @@ -390,7 +396,7 @@ cpdef object _get_rule_month(object source, object default='DEC'): return source.split('-')[1] -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Parsing for type-inference @@ -404,7 +410,7 @@ def try_parse_dates(ndarray[object] values, parser=None, result = np.empty(n, dtype='O') if parser is None: - if default is None: # GH2618 + if default is None: # GH2618 date = datetime.now() default = datetime(date.year, date.month, 1) @@ -449,7 +455,7 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times, result = np.empty(n, dtype='O') if date_parser is None: - if default is None: # GH2618 + if default is None: # GH2618 date = datetime.now() default = datetime(date.year, date.month, 1) @@ -506,7 +512,7 @@ def try_parse_datetime_components(ndarray[object] years, n = len(years) if (len(months) != n or len(days) != n or len(hours) != n or - len(minutes) != n or len(seconds) != n): + len(minutes) != n or len(seconds) != n): raise ValueError('Length of all datetime components must be equal') result = np.empty(n, dtype='O') @@ -525,7 +531,7 @@ def try_parse_datetime_components(ndarray[object] years, return result -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Miscellaneous _DATEUTIL_LEXER_SPLIT = None diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index a38aa37674e9e..214d7c0f2b432 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -69,6 +69,7 @@ def array_strptime(ndarray[object] values, object fmt, bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' + int ordinal assert is_raise or is_ignore or is_coerce @@ -102,7 +103,7 @@ def array_strptime(ndarray[object] values, object fmt, bad_directive = "%" del err raise ValueError("'%s' is a bad directive in format '%s'" % - (bad_directive, fmt)) + (bad_directive, fmt)) # IndexError only occurs when the format string is "%" except IndexError: raise ValueError("stray %% in format '%s'" % fmt) @@ -163,7 +164,7 @@ def array_strptime(ndarray[object] values, object fmt, iresult[i] = NPY_NAT continue raise ValueError("unconverted data remains: %s" % - values[i][found.end():]) + values[i][found.end():]) # search else: @@ -198,8 +199,8 @@ def array_strptime(ndarray[object] values, object fmt, if parse_code == 0: year = int(found_dict['y']) # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 + # value in the range of [00, 68] is in the century 2000, while + # [69,99] is in the century 1900 if year <= 68: year += 2000 else: @@ -296,9 +297,10 @@ def array_strptime(ndarray[object] values, object fmt, if julian == -1: # Need to add 1 to result since first day of the year is 1, not # 0. - julian = datetime_date(year, month, day).toordinal() - \ - datetime_date(year, 1, 1).toordinal() + 1 - else: # Assume that if they bothered to include Julian day it will + ordinal = datetime_date(year, month, day).toordinal() + julian = ordinal - datetime_date(year, 1, 1).toordinal() + 1 + else: + # Assume that if they bothered to include Julian day it will # be accurate. datetime_result = datetime_date.fromordinal( (julian - 1) + datetime_date(year, 1, 1).toordinal()) @@ -454,8 +456,8 @@ class LocaleTime(object): date_time[1] = time.strftime("%x", time_tuple).lower() date_time[2] = time.strftime("%X", time_tuple).lower() replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], - '%B'), (self.a_weekday[2], '%a'), + (self.f_month[3], '%B'), + (self.a_weekday[2], '%a'), (self.a_month[3], '%b'), (self.am_pm[1], '%p'), ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), @@ -463,7 +465,7 @@ class LocaleTime(object): # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I')] replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) + for tz in tz_values]) for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')): current_format = date_time[offset] for old, new in replacement_pairs: @@ -536,7 +538,7 @@ class TimeRE(dict): 'w': r"(?P[0-6])", # W is set below by using 'U' 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than + # XXX: Does 'Y' need to worry about having less or more than # 4 digits? 'Y': r"(?P\d\d\d\d)", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), @@ -604,7 +606,7 @@ _cache_lock = _thread_allocate_lock() # DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock # first! _TimeRE_cache = TimeRE() -_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache +_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache _regex_cache = {} @@ -615,7 +617,7 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, assumes the week starts on Sunday or Monday (6 or 0).""" cdef: - int first_weekday, week_0_length, days_to_week + int first_weekday, week_0_length, days_to_week first_weekday = datetime_date(year, 1, 1).weekday() # If we are dealing with the %U directive (week starts on Sunday), it's diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index da1163e25f5c6..10c379ad43a63 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -222,7 +222,7 @@ cdef inline parse_timedelta_string(object ts): elif have_dot: if ((len(number) or len(frac)) and not len(unit) - and current_unit is None): + and current_unit is None): raise ValueError("no units specified") if len(frac) > 0 and len(frac) <= 3: diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 7f778dde86e23..7fb48e7c66f47 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -156,9 +156,10 @@ cdef inline object tz_cache_key(object tz): return None -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # UTC Offsets + cpdef get_utcoffset(tzinfo, obj): try: return tzinfo._utcoffset @@ -174,7 +175,7 @@ cdef inline bint is_fixed_offset(object tz): return 0 elif treat_tz_as_pytz(tz): if (len(tz._transition_info) == 0 - and len(tz._utc_transition_times) == 0): + and len(tz._utc_transition_times) == 0): return 1 else: return 0 @@ -246,7 +247,7 @@ cdef object get_dst_info(object tz): # get utc trans times trans_list = get_utc_trans_times_from_dateutil_tz(tz) trans = np.hstack([ - np.array([0], dtype='M8[s]'), # place holder for first item + np.array([0], dtype='M8[s]'), # place holder for 1st item np.array(trans_list, dtype='M8[s]')]).astype( 'M8[ns]') # all trans listed trans = trans.view('i8') diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index b6bd6f92f6199..a95e50785c9b0 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1381,8 +1381,8 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, else: vlow = skiplist.get(idx) vhigh = skiplist.get(idx + 1) - output[i] = (vlow + (vhigh - vlow) * - (quantile * (nobs - 1) - idx)) + output[i] = ((vlow + (vhigh - vlow) * + (quantile * (nobs - 1) - idx))) else: output[i] = NaN diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx index fd3f4612fb432..f6383b42d4975 100644 --- a/pandas/io/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -1,5 +1,5 @@ # coding: utf-8 -#cython: embedsignature=True +# cython: embedsignature=True from cpython cimport * from libc.stdlib cimport * diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index 22401d7514f65..05dfaad8b2058 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -1,5 +1,5 @@ # coding: utf-8 -#cython: embedsignature=True +# cython: embedsignature=True from cpython cimport * cdef extern from "Python.h": @@ -20,7 +20,7 @@ cdef extern from "../../src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook - bint has_pairs_hook # call object_hook with k-v pairs + bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook PyObject* ext_hook char *encoding @@ -100,7 +100,7 @@ def default_read_extended_type(typecode, data): def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, @@ -257,7 +257,7 @@ cdef class Unpacker(object): object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, @@ -467,8 +467,8 @@ cdef class Unpacker(object): return self._unpack(unpack_construct, None, 1) # for debug. - #def _buf(self): + # def _buf(self): # return PyString_FromStringAndSize(self.buf, self.buf_tail) - #def _off(self): + # def _off(self): # return self.buf_head diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index dadfe7ca87e48..021f3715d472b 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -1,17 +1,17 @@ """ test feather-format compat """ - -import pytest -feather = pytest.importorskip('feather') +from distutils.version import LooseVersion import numpy as np -import pandas as pd -from pandas.io.feather_format import to_feather, read_feather -from feather import FeatherError -from pandas.util.testing import assert_frame_equal, ensure_clean +import pandas as pd import pandas.util.testing as tm -from distutils.version import LooseVersion +from pandas.util.testing import assert_frame_equal, ensure_clean + +import pytest +feather = pytest.importorskip('feather') +from feather import FeatherError # noqa:E402 +from pandas.io.feather_format import to_feather, read_feather # noqa:E402 fv = LooseVersion(feather.__version__) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a97747b93369f..13bf81889af1a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -3,39 +3,38 @@ import tempfile from contextlib import contextmanager from warnings import catch_warnings +from distutils.version import LooseVersion import datetime from datetime import timedelta + import numpy as np -import pandas import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, - isna) + isna, compat, concat, Timestamp) + +import pandas.util.testing as tm +from pandas.util.testing import (assert_panel4d_equal, + assert_panel_equal, + assert_frame_equal, + assert_series_equal, + set_timezone) from pandas.compat import (is_platform_windows, is_platform_little_endian, - PY3, PY35, PY36, BytesIO, text_type) + PY3, PY35, PY36, BytesIO, text_type, + range, lrange, u) from pandas.io.formats.printing import pprint_thing from pandas.core.dtypes.common import is_categorical_dtype tables = pytest.importorskip('tables') -from pandas.io.pytables import TableIterator -from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, +from pandas.io import pytables as pytables # noqa:E402 +from pandas.io.pytables import (TableIterator, # noqa:E402 + HDFStore, get_store, Term, read_hdf, PossibleDataLossError, ClosedFileError) -from pandas.io import pytables as pytables -import pandas.util.testing as tm -from pandas.util.testing import (assert_panel4d_equal, - assert_panel_equal, - assert_frame_equal, - assert_series_equal, - set_timezone) -from pandas import concat, Timestamp -from pandas import compat -from pandas.compat import range, lrange, u -from distutils.version import LooseVersion _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' else 'zlib') @@ -328,13 +327,13 @@ def test_api_default_format(self): with ensure_clean_store(self.path) as store: df = tm.makeDataFrame() - pandas.set_option('io.hdf.default_format', 'fixed') + pd.set_option('io.hdf.default_format', 'fixed') _maybe_remove(store, 'df') store.put('df', df) assert not store.get_storer('df').is_table pytest.raises(ValueError, store.append, 'df2', df) - pandas.set_option('io.hdf.default_format', 'table') + pd.set_option('io.hdf.default_format', 'table') _maybe_remove(store, 'df') store.put('df', df) assert store.get_storer('df').is_table @@ -342,19 +341,19 @@ def test_api_default_format(self): store.append('df2', df) assert store.get_storer('df').is_table - pandas.set_option('io.hdf.default_format', None) + pd.set_option('io.hdf.default_format', None) with ensure_clean_path(self.path) as path: df = tm.makeDataFrame() - pandas.set_option('io.hdf.default_format', 'fixed') + pd.set_option('io.hdf.default_format', 'fixed') df.to_hdf(path, 'df') with HDFStore(path) as store: assert not store.get_storer('df').is_table pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True) - pandas.set_option('io.hdf.default_format', 'table') + pd.set_option('io.hdf.default_format', 'table') df.to_hdf(path, 'df3') with HDFStore(path) as store: assert store.get_storer('df3').is_table @@ -362,7 +361,7 @@ def test_api_default_format(self): with HDFStore(path) as store: assert store.get_storer('df4').is_table - pandas.set_option('io.hdf.default_format', None) + pd.set_option('io.hdf.default_format', None) def test_keys(self): @@ -1086,7 +1085,7 @@ def _try_decode(x, encoding='latin-1'): examples = [] for dtype in ['category', object]: for val in values: - examples.append(pandas.Series(val, dtype=dtype)) + examples.append(pd.Series(val, dtype=dtype)) def roundtrip(s, key='data', encoding='latin-1', nan_rep=''): with ensure_clean_path(self.path) as store: @@ -1171,13 +1170,13 @@ def test_append_all_nans(self): tm.assert_frame_equal(store['df2'], df) # tests the option io.hdf.dropna_table - pandas.set_option('io.hdf.dropna_table', False) + pd.set_option('io.hdf.dropna_table', False) _maybe_remove(store, 'df3') store.append('df3', df[:10]) store.append('df3', df[10:]) tm.assert_frame_equal(store['df3'], df) - pandas.set_option('io.hdf.dropna_table', True) + pd.set_option('io.hdf.dropna_table', True) _maybe_remove(store, 'df4') store.append('df4', df[:10]) store.append('df4', df[10:]) @@ -2253,7 +2252,7 @@ def test_calendar_roundtrip_issue(self): weekmask_egypt = 'Sun Mon Tue Wed Thu' holidays = ['2012-05-01', datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')] - bday_egypt = pandas.offsets.CustomBusinessDay( + bday_egypt = pd.offsets.CustomBusinessDay( holidays=holidays, weekmask=weekmask_egypt) dt = datetime.datetime(2013, 4, 30) dts = date_range(dt, periods=5, freq=bday_egypt) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 763e6547ea2cb..be25a439f9075 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -312,7 +312,7 @@ def _get_freq_str(base, mult=1): # --------------------------------------------------------------------- # Offset names ("time rules") and related functions -from pandas._libs.tslibs.offsets import _offset_to_period_map +from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402 from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa Minute, Hour, Day, BDay, CDay, Week, MonthBegin, diff --git a/setup.cfg b/setup.cfg index 0123078523b6f..7a88ee8557dc7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] -ignore = E731,E402 +ignore = E731,E402,W503 max-line-length = 79 [yapf] diff --git a/setup.py b/setup.py index ed58329d5fd8f..783ded906eba2 100755 --- a/setup.py +++ b/setup.py @@ -7,10 +7,16 @@ """ import os +from os.path import join as pjoin + import sys import shutil from distutils.version import LooseVersion +# versioning +import versioneer +cmdclass = versioneer.get_cmdclass() + def is_platform_windows(): return sys.platform == 'win32' or sys.platform == 'cygwin' @@ -24,10 +30,6 @@ def is_platform_mac(): return sys.platform == 'darwin' -# versioning -import versioneer -cmdclass = versioneer.get_cmdclass() - min_cython_ver = '0.23' try: import Cython @@ -77,9 +79,9 @@ def is_platform_mac(): " use pip or easy_install." "\n $ pip install 'python-dateutil < 2' 'numpy'") -from distutils.extension import Extension -from distutils.command.build import build -from distutils.command.build_ext import build_ext as _build_ext +from distutils.extension import Extension # noqa:E402 +from distutils.command.build import build # noqa:E402 +from distutils.command.build_ext import build_ext as _build_ext # noqa:E402 try: if not _CYTHON_INSTALLED: @@ -105,9 +107,6 @@ def is_platform_mac(): 'pip install Tempita') -from os.path import join as pjoin - - _pxi_dep_template = { 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_take_helper.pxi.in',