From d6c88ae0d13c7b9f5a22c5879b775df13fb9129a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 30 Oct 2017 21:33:56 -0700 Subject: [PATCH 1/7] a zillion flakes --- ci/lint.sh | 2 +- pandas/_libs/algos.pyx | 5 +- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/hashing.pyx | 8 +- pandas/_libs/index.pyx | 7 +- pandas/_libs/interval.pyx | 7 +- pandas/_libs/join.pyx | 2 +- pandas/_libs/lib.pyx | 38 ++++--- pandas/_libs/parsers.pyx | 26 +++-- pandas/_libs/period.pyx | 34 +++--- pandas/_libs/sparse.pyx | 4 +- pandas/_libs/src/inference.pyx | 8 +- pandas/_libs/src/reduce.pyx | 3 +- pandas/_libs/tslib.pyx | 25 +++-- pandas/_libs/tslibs/conversion.pyx | 11 +- pandas/_libs/tslibs/fields.pyx | 14 +-- pandas/_libs/tslibs/frequencies.pyx | 166 ++++++++++++++-------------- pandas/_libs/tslibs/parsing.pyx | 34 +++--- pandas/_libs/tslibs/strptime.pyx | 25 +++-- pandas/_libs/tslibs/timedeltas.pyx | 4 +- pandas/_libs/tslibs/timezones.pyx | 9 +- pandas/_libs/window.pyx | 4 +- pandas/io/msgpack/_packer.pyx | 2 +- pandas/io/msgpack/_unpacker.pyx | 10 +- 24 files changed, 242 insertions(+), 208 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 22f8628f59dcd..fc7e0cf9ed3c5 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -24,7 +24,7 @@ if [ "$LINT" ]; then echo "Linting setup.py DONE" echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126 + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,W503,E301,E127,E261,E271,E129,W291,E222,E241,E123 if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index a44a7288bda45..e9ef9c4ffe24b 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -258,7 +258,7 @@ def min_subseq(ndarray[double_t] arr): return (s, e, -m) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Pairwise correlation/covariance @@ -322,7 +322,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): return result -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Pairwise Spearman correlation @@ -386,6 +386,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): return result + # generated from template include "algos_common_helper.pxi" include "algos_rank_helper.pxi" diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 1cb7b18fa4f61..2fbbc81c4b5a1 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -25,7 +25,7 @@ cdef double nan = NaN # TODO: aggregate multiple columns in single pass -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # first, nth, last diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 06ed947808e39..53203dd30daee 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -93,22 +93,26 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): free(lens) return result + cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: return (x << b) | (x >> (64 - b)) + cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil: p[0] = (v) p[1] = (v >> 8) p[2] = (v >> 16) p[3] = (v >> 24) + cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil: u32to8_le(p, v) u32to8_le(p + 4, (v >> 32)) + cdef inline uint64_t u8to64_le(uint8_t* p) nogil: return (p[0] | - p[1] << 8 | + p[1] << 8 | p[2] << 16 | p[3] << 24 | p[4] << 32 | @@ -116,6 +120,7 @@ cdef inline uint64_t u8to64_le(uint8_t* p) nogil: p[6] << 48 | p[7] << 56) + cdef inline void _sipround(uint64_t* v0, uint64_t* v1, uint64_t* v2, uint64_t* v3) nogil: v0[0] += v1[0] @@ -133,6 +138,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v1[0] ^= v2[0] v2[0] = _rotl(v2[0], 32) + cpdef uint64_t siphash(bytes data, bytes key) except? 0: if len(key) != 16: raise ValueError( diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index c96251a0293d6..e98c0131e9c44 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -122,7 +122,7 @@ cdef class IndexEngine: if not self.is_unique: return self._get_loc_duplicates(val) values = self._get_index_values() - loc = _bin_search(values, val) # .searchsorted(val, side='left') + loc = _bin_search(values, val) # .searchsorted(val, side='left') if loc >= len(values): raise KeyError(val) if util.get_value_at(values, loc) != val: @@ -475,15 +475,14 @@ cdef class DatetimeEngine(Int64Engine): if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') - return algos.pad_int64(self._get_index_values(), other, - limit=limit) + return algos.pad_int64(self._get_index_values(), other, limit=limit) def get_backfill_indexer(self, other, limit=None): if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') return algos.backfill_int64(self._get_index_values(), other, - limit=limit) + limit=limit) cdef class TimedeltaEngine(DatetimeEngine): diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 264a983fe4d53..54b7725b10692 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -13,6 +13,7 @@ from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, import numbers _VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) + cdef class IntervalMixin: property closed_left: def __get__(self): @@ -40,9 +41,9 @@ cdef class IntervalMixin: cdef _interval_like(other): - return (hasattr(other, 'left') - and hasattr(other, 'right') - and hasattr(other, 'closed')) + return (hasattr(other, 'left') and + hasattr(other, 'right') and + hasattr(other, 'closed')) cdef class Interval(IntervalMixin): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 33c3650fa0425..8dbc70a0bdbe9 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -147,7 +147,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, - Py_ssize_t max_groups): + Py_ssize_t max_groups): cdef: Py_ssize_t i, j, k, count = 0 ndarray[int64_t] left_count, right_count, left_sorter, right_sorter diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e7e92b7ae987a..7996d0f454aee 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -86,6 +86,7 @@ def values_from_object(object o): return o + cpdef map_indices_list(list index): """ Produce a dict mapping the values of the input array to their respective @@ -120,7 +121,7 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr): s += arr[i].__sizeof__() return s -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # isnull / notnull related cdef double INF = np.inf @@ -188,18 +189,18 @@ cpdef bint isscalar(object val): """ - return (np.PyArray_IsAnyScalar(val) + return (np.PyArray_IsAnyScalar(val) or # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. - or PyBytes_Check(val) + PyBytes_Check(val) or # We differ from numpy (as of 1.10), which claims that None is # not scalar in np.isscalar(). - or val is None - or PyDate_Check(val) - or PyDelta_Check(val) - or PyTime_Check(val) - or util.is_period_object(val) - or is_decimal(val) - or is_interval(val)) + val is None or + PyDate_Check(val) or + PyDelta_Check(val) or + PyTime_Check(val) or + util.is_period_object(val) or + is_decimal(val) or + is_interval(val)) def item_from_zerodim(object val): @@ -994,7 +995,7 @@ def convert_json_to_lines(object arr): in_quotes = ~in_quotes if v == backslash or is_escaping: is_escaping = ~is_escaping - if v == comma: # commas that should be \n + if v == comma: # commas that should be \n if num_open_brackets_seen == 0 and not in_quotes: narr[i] = newline elif v == left_bracket: @@ -1019,7 +1020,7 @@ def write_csv_rows(list data, ndarray data_index, # In crude testing, N>100 yields little marginal improvement N=100 - # pre-allocate rows + # pre-allocate rows ncols = len(cols) rows = [[None] * (nlevels + ncols) for x in range(N)] @@ -1051,12 +1052,13 @@ def write_csv_rows(list data, ndarray data_index, if j >= N - 1 and j % N == N - 1: writer.writerows(rows) - if j >= 0 and (j < N - 1 or (j % N) != N - 1): + if j >= 0 and (j < N - 1 or (j % N) != N - 1): writer.writerows(rows[:((j + 1) % N)]) -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Groupby-related functions + @cython.boundscheck(False) def arrmap(ndarray[object] index, object func): cdef int length = index.shape[0] @@ -1140,7 +1142,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, bins = np.empty(lenbin - 1, dtype=np.int64) j = 0 # index into values - bc = 0 # bin count + bc = 0 # bin count # linear scan if right_closed: @@ -1289,9 +1291,9 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, cdef class _PandasNull: def __richcmp__(_PandasNull self, object other, int op): - if op == 2: # == + if op == 2: # == return isinstance(other, _PandasNull) - elif op == 3: # != + elif op == 3: # != return not isinstance(other, _PandasNull) else: return False @@ -1797,7 +1799,7 @@ cdef class BlockPlacement: stop += other_int if ((step > 0 and start < 0) or - (step < 0 and stop < step)): + (step < 0 and stop < step)): raise ValueError("iadd causes length change") if stop < 0: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 0dacdf70a71d5..7fdd0630086d7 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h": # Store words in (potentially ragged) matrix for now, hmm char **words - int64_t *word_starts # where we are in the stream + int64_t *word_starts # where we are in the stream int64_t words_len int64_t words_cap @@ -400,7 +400,7 @@ cdef class TextReader: raise ValueError('only length-1 separators excluded right now') self.parser.delimiter = ord(delimiter) - #---------------------------------------- + # ---------------------------------------- # parser options self.parser.doublequote = doublequote @@ -519,7 +519,7 @@ cdef class TextReader: self.index_col = index_col - #---------------------------------------- + # ---------------------------------------- # header stuff self.allow_leading_cols = allow_leading_cols @@ -756,8 +756,8 @@ cdef class TextReader: start = self.parser.line_start[0] # e.g., if header=3 and file only has 2 lines - elif (self.parser.lines < hr + 1 - and not isinstance(self.orig_header, list)) or ( + elif (self.parser.lines < hr + 1 and + not isinstance(self.orig_header, list)) or ( self.parser.lines < hr): msg = self.orig_header if isinstance(msg, list): @@ -810,7 +810,7 @@ cdef class TextReader: if hr == self.header[-1]: lc = len(this_header) ic = (len(self.index_col) if self.index_col - is not None else 0) + is not None else 0) if lc != unnamed_count and lc - ic > unnamed_count: hr -= 1 self.parser_start -= 1 @@ -848,7 +848,7 @@ cdef class TextReader: # Corner case, not enough lines in the file if self.parser.lines < data_line + 1: field_count = len(header[0]) - else: # not self.has_usecols: + else: # not self.has_usecols: field_count = self.parser.line_fields[data_line] @@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst): result.append(x) return result + cdef asbytes(object o): if PY3: return str(o).encode('utf-8') @@ -1417,11 +1418,13 @@ def _maybe_upcast(arr): return arr + cdef enum StringPath: CSTRING UTF8 ENCODED + # factored out logic to pick string converter cdef inline StringPath _string_path(char *encoding): if encoding != NULL and encoding != b"utf-8": @@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding): return UTF8 else: return CSTRING + + # ---------------------------------------------------------------------- # Type conversions / inference support code + cdef _string_box_factorize(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): @@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser, parser.sci, parser.thousands, 1) if errno != 0 or p_end[0] or p_end == word: if (strcasecmp(word, cinf) == 0 or - strcasecmp(word, cposinf) == 0): + strcasecmp(word, cposinf) == 0): data[0] = INF elif strcasecmp(word, cneginf) == 0: data[0] = NEGINF @@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser, parser.sci, parser.thousands, 1) if errno != 0 or p_end[0] or p_end == word: if (strcasecmp(word, cinf) == 0 or - strcasecmp(word, cposinf) == 0): + strcasecmp(word, cposinf) == 0): data[0] = INF elif strcasecmp(word, cneginf) == 0: data[0] = NEGINF @@ -2263,6 +2269,7 @@ def _compute_na_values(): } return na_values + na_values = _compute_na_values() for k in list(na_values): @@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols): return recs + cdef _fill_structured_column(char *dst, char* src, int64_t elsize, int64_t stride, int64_t length, bint incref): cdef: diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 76664e276c634..e21d69cdf59af 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -108,8 +108,8 @@ cdef extern from "period_helper.h": initialize_daytime_conversion_factor_matrix() +# ---------------------------------------------------------------------- # Period logic -#---------------------------------------------------------------------- @cython.wraparound(False) @@ -167,9 +167,11 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): return out + cdef char START = 'S' cdef char END = 'E' + cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, bint end): """ @@ -277,31 +279,31 @@ def period_format(int64_t value, int freq, object fmt=None): if fmt is None: freq_group = (freq // 1000) * 1000 - if freq_group == 1000: # FR_ANN + if freq_group == 1000: # FR_ANN fmt = b'%Y' - elif freq_group == 2000: # FR_QTR + elif freq_group == 2000: # FR_QTR fmt = b'%FQ%q' - elif freq_group == 3000: # FR_MTH + elif freq_group == 3000: # FR_MTH fmt = b'%Y-%m' - elif freq_group == 4000: # WK + elif freq_group == 4000: # WK left = period_asfreq(value, freq, 6000, 0) right = period_asfreq(value, freq, 6000, 1) return '%s/%s' % (period_format(left, 6000), period_format(right, 6000)) - elif (freq_group == 5000 # BUS - or freq_group == 6000): # DAY + elif (freq_group == 5000 or # BUS + freq_group == 6000): # DAY fmt = b'%Y-%m-%d' - elif freq_group == 7000: # HR + elif freq_group == 7000: # HR fmt = b'%Y-%m-%d %H:00' - elif freq_group == 8000: # MIN + elif freq_group == 8000: # MIN fmt = b'%Y-%m-%d %H:%M' - elif freq_group == 9000: # SEC + elif freq_group == 9000: # SEC fmt = b'%Y-%m-%d %H:%M:%S' - elif freq_group == 10000: # MILLISEC + elif freq_group == 10000: # MILLISEC fmt = b'%Y-%m-%d %H:%M:%S.%l' - elif freq_group == 11000: # MICROSEC + elif freq_group == 11000: # MICROSEC fmt = b'%Y-%m-%d %H:%M:%S.%u' - elif freq_group == 12000: # NANOSEC + elif freq_group == 12000: # NANOSEC fmt = b'%Y-%m-%d %H:%M:%S.%n' else: raise ValueError('Unknown freq: %d' % freq) @@ -729,7 +731,7 @@ cdef class _Period(object): return Period(ordinal=ordinal, freq=self.freq) msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - else: # pragma no cover + else: # pragma no cover return NotImplemented def __add__(self, other): @@ -1147,8 +1149,8 @@ class Period(_Period): elif value is None: if (year is None and month is None and - quarter is None and day is None and - hour is None and minute is None and second is None): + quarter is None and day is None and + hour is None and minute is None and second is None): ordinal = iNaT else: if freq is None: diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index fac678e531c8b..7f18302767635 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -486,7 +486,7 @@ cdef class BlockIndex(SparseIndex): cur_length = xlen[xi] xi += 1 - else: # xloc[xi] < yloc[yi] + else: # xloc[xi] < yloc[yi] cur_loc = yloc[yi] diff = yloc[yi] - xloc[xi] @@ -629,7 +629,7 @@ cdef class BlockMerge(object): cdef: BlockIndex x, y, result ndarray xstart, xlen, xend, ystart, ylen, yend - int32_t xi, yi # block indices + int32_t xi, yi # block indices def __init__(self, BlockIndex x, BlockIndex y): self.x = x diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index c340e870e9722..79ecd35f4149c 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -165,8 +165,8 @@ cdef class Seen(object): two conflict cases was also detected. However, we are trying to force conversion to a numeric dtype. """ - return (self.uint_ and (self.null_ or self.sint_) - and not self.coerce_numeric) + return (self.uint_ and (self.null_ or self.sint_) and + not self.coerce_numeric) cdef inline saw_null(self): """ @@ -447,8 +447,8 @@ def infer_dtype(object value, bint skipna=False): for i in range(n): val = util.get_value_1d(values, i) if (util.is_integer_object(val) and - not util.is_timedelta64_object(val) and - not util.is_datetime64_object(val)): + not util.is_timedelta64_object(val) and + not util.is_datetime64_object(val)): return 'mixed-integer' return 'mixed' diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx index f578eb2f4a346..091f3523c402b 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/src/reduce.pyx @@ -512,7 +512,7 @@ def apply_frame_axis0(object frame, object f, object names, for i in range(n): slider.move(starts[i], ends[i]) - item_cache.clear() # ugh + item_cache.clear() # ugh object.__setattr__(slider.dummy, 'name', names[i]) piece = f(slider.dummy) @@ -532,6 +532,7 @@ def apply_frame_axis0(object frame, object f, object names, return results, mutated + cdef class BlockSlider: """ Only capable of sliding on axis=0 diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index af52a18d5dafe..131bb62b2301a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -422,7 +422,7 @@ class Timestamp(_Timestamp): def _round(self, freq, rounder): cdef: - int64_t unit, r, value, buff = 1000000 + int64_t unit, r, value, buff = 1000000 object result from pandas.tseries.frequencies import to_offset @@ -621,7 +621,7 @@ class Timestamp(_Timestamp): # tz naive, localize tz = maybe_get_tz(tz) if not util.is_string_object(ambiguous): - ambiguous = [ambiguous] + ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, ambiguous=ambiguous, errors=errors)[0] return Timestamp(value, tz=tz) @@ -764,9 +764,9 @@ class Timestamp(_Timestamp): Returns if the Timestamp has a time component in addition to the date part """ - return (self.time() != _zero_time - or self.tzinfo is not None - or self.nanosecond != 0) + return (self.time() != _zero_time or + self.tzinfo is not None or + self.nanosecond != 0) def to_julian_date(self): """ @@ -984,7 +984,7 @@ class NaTType(_NaT): utcoffset = _make_error_func('utcoffset', datetime) # Timestamp has empty docstring for some methods. - utcfromtimestamp = _make_error_func('utcfromtimestamp', None) + utcfromtimestamp = _make_error_func('utcfromtimestamp', None) fromtimestamp = _make_error_func('fromtimestamp', None) combine = _make_error_func('combine', None) utcnow = _make_error_func('utcnow', None) @@ -1024,13 +1024,16 @@ def __nat_unpickle(*args): # return constant defined in the module return NaT + NaT = NaTType() + cdef inline bint _checknull_with_nat(object val): """ utility to check if a value is a nat or not """ return val is None or ( PyFloat_Check(val) and val != val) or val is NaT + cdef inline bint _check_all_nulls(object val): """ utility to check if a value is any type of null """ cdef bint res @@ -1048,6 +1051,7 @@ cdef inline bint _check_all_nulls(object val): res = 0 return res + cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1: return _nat_scalar_rules[op] @@ -3206,6 +3210,7 @@ cdef inline _to_i8(object val): return Timestamp(val).value return val + cpdef pydt_to_i8(object pydt): """ Convert to int64 representation compatible with numpy datetime64; converts @@ -3396,6 +3401,7 @@ def monthrange(int64_t year, int64_t month): cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month -1] + cpdef normalize_date(object dt): """ Normalize datetime.datetime value to midnight. Returns datetime.date as a @@ -3416,13 +3422,12 @@ cpdef normalize_date(object dt): raise TypeError('Unrecognized type: %s' % type(dt)) -cdef inline int _year_add_months(pandas_datetimestruct dts, - int months) nogil: +cdef inline int _year_add_months(pandas_datetimestruct dts, int months) nogil: """new year number after shifting pandas_datetimestruct number of months""" return dts.year + (dts.month + months - 1) / 12 -cdef inline int _month_add_months(pandas_datetimestruct dts, - int months) nogil: + +cdef inline int _month_add_months(pandas_datetimestruct dts, int months) nogil: """ New month number after shifting pandas_datetimestruct number of months. diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 69204e2ca4e1a..de75dfe8ed25a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -215,8 +215,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = (int(get_utcoffset(tz1, dt).total_seconds()) - * 1000000000) + delta = (int(get_utcoffset(tz1, dt).total_seconds()) * + 1000000000) utc_dates[i] = v - delta else: trans, deltas, typ = get_dst_info(tz1) @@ -255,8 +255,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = (int(get_utcoffset(tz2, dt).total_seconds()) - * 1000000000) + delta = (int(get_utcoffset(tz2, dt).total_seconds()) * + 1000000000) result[i] = v + delta return result @@ -423,7 +423,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, raise pytz.AmbiguousTimeError( "There are %i dst switches when " "there should only be 1." % switch_idx.size) - switch_idx = switch_idx[0] + 1 # Pull the only index and adjust + switch_idx = switch_idx[0] + 1 + # Pull the only index and adjust a_idx = grp[:switch_idx] b_idx = grp[switch_idx:] dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 53ed8ddf22f4b..b40646295cce5 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -40,13 +40,13 @@ def build_field_sarray(ndarray[int64_t] dtindex): count = len(dtindex) - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond + sa_dtype = [('Y', 'i4'), # year + ('M', 'i4'), # month + ('D', 'i4'), # day + ('h', 'i4'), # hour + ('m', 'i4'), # min + ('s', 'i4'), # second + ('u', 'i4')] # microsecond out = np.empty(count, dtype=sa_dtype) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index f7889d76abbc7..e15b4f6d7a6b4 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -10,89 +10,15 @@ np.import_array() from util cimport is_integer_object - -cpdef get_freq_code(freqstr): - """ - Return freq str or tuple to freq code and stride (mult) - - Parameters - ---------- - freqstr : str or tuple - - Returns - ------- - return : tuple of base frequency code and stride (mult) - - Example - ------- - >>> get_freq_code('3D') - (6000, 3) - - >>> get_freq_code('D') - (6000, 1) - - >>> get_freq_code(('D', 3)) - (6000, 3) - """ - if getattr(freqstr, '_typ', None) == 'dateoffset': - freqstr = (freqstr.rule_code, freqstr.n) - - if isinstance(freqstr, tuple): - if (is_integer_object(freqstr[0]) and - is_integer_object(freqstr[1])): - # e.g., freqstr = (2000, 1) - return freqstr - else: - # e.g., freqstr = ('T', 5) - try: - code = _period_str_to_code(freqstr[0]) - stride = freqstr[1] - except: - if is_integer_object(freqstr[1]): - raise - code = _period_str_to_code(freqstr[1]) - stride = freqstr[0] - return code, stride - - if is_integer_object(freqstr): - return (freqstr, 1) - - base, stride = _base_and_stride(freqstr) - code = _period_str_to_code(base) - - return code, stride - +# ---------------------------------------------------------------------- +# Constants # hack to handle WOM-1MON opattern = re.compile( r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' ) - -cpdef _base_and_stride(freqstr): - """ - Return base freq and stride info from string representation - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return (base, stride) - +_INVALID_FREQ_ERROR = "Invalid frequency: {0}" # --------------------------------------------------------------------- # Period codes @@ -147,8 +73,8 @@ _period_code_map = { "S": 9000, # Secondly "L": 10000, # Millisecondly "U": 11000, # Microsecondly - "N": 12000, # Nanosecondly -} + "N": 12000} # Nanosecondly + _reverse_period_code_map = { _period_code_map[key]: key for key in _period_code_map} @@ -165,8 +91,6 @@ _period_code_map.update({ "C": 5000, # Custom Business Day }) -_dont_uppercase = set(('MS', 'ms')) - _lite_rule_alias = { 'W': 'W-SUN', 'Q': 'Q-DEC', @@ -186,7 +110,85 @@ _lite_rule_alias = { 'us': 'U', 'ns': 'N'} -_INVALID_FREQ_ERROR = "Invalid frequency: {0}" +_dont_uppercase = set(('MS', 'ms')) + +# ---------------------------------------------------------------------- + +cpdef get_freq_code(freqstr): + """ + Return freq str or tuple to freq code and stride (mult) + + Parameters + ---------- + freqstr : str or tuple + + Returns + ------- + return : tuple of base frequency code and stride (mult) + + Example + ------- + >>> get_freq_code('3D') + (6000, 3) + + >>> get_freq_code('D') + (6000, 1) + + >>> get_freq_code(('D', 3)) + (6000, 3) + """ + if getattr(freqstr, '_typ', None) == 'dateoffset': + freqstr = (freqstr.rule_code, freqstr.n) + + if isinstance(freqstr, tuple): + if (is_integer_object(freqstr[0]) and + is_integer_object(freqstr[1])): + # e.g., freqstr = (2000, 1) + return freqstr + else: + # e.g., freqstr = ('T', 5) + try: + code = _period_str_to_code(freqstr[0]) + stride = freqstr[1] + except: + if is_integer_object(freqstr[1]): + raise + code = _period_str_to_code(freqstr[1]) + stride = freqstr[0] + return code, stride + + if is_integer_object(freqstr): + return (freqstr, 1) + + base, stride = _base_and_stride(freqstr) + code = _period_str_to_code(base) + + return code, stride + + +cpdef _base_and_stride(freqstr): + """ + Return base freq and stride info from string representation + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return (base, stride) cpdef _period_str_to_code(freqstr): diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 845d1b8dcabba..1f6dbe197dfc9 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -44,10 +44,13 @@ from dateutil.relativedelta import relativedelta from dateutil.parser import DEFAULTPARSER from dateutil.parser import parse as du_parse +# ---------------------------------------------------------------------- +# Constants class DateParseError(ValueError): pass + _nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN']) _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, @@ -64,6 +67,7 @@ cdef set _not_datelike_strings = set(['a', 'A', 'm', 'M', 'p', 'P', 't', 'T']) NAT_SENTINEL = object() # This allows us to reference NaT without having to import it +# ---------------------------------------------------------------------- def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=False, **kwargs): @@ -199,7 +203,7 @@ cpdef bint _does_string_look_like_datetime(object date_string): cdef inline object _parse_dateabbr_string(object date_string, object default, - object freq): + object freq): cdef: object ret int year, quarter = -1, month, mnum, date_len @@ -229,27 +233,27 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, i = date_string.index('Q', 1, 6) if i == 1: quarter = int(date_string[0]) - if date_len == 4 or (date_len == 5 - and date_string[i + 1] == '-'): + if date_len == 4 or (date_len == 5 and + date_string[i + 1] == '-'): # r'(\d)Q-?(\d\d)') year = 2000 + int(date_string[-2:]) - elif date_len == 6 or (date_len == 7 - and date_string[i + 1] == '-'): + elif date_len == 6 or (date_len == 7 and + date_string[i + 1] == '-'): # r'(\d)Q-?(\d\d\d\d)') year = int(date_string[-4:]) else: raise ValueError elif i == 2 or i == 3: # r'(\d\d)-?Q(\d)' - if date_len == 4 or (date_len == 5 - and date_string[i - 1] == '-'): + if date_len == 4 or (date_len == 5 and + date_string[i - 1] == '-'): quarter = int(date_string[-1]) year = 2000 + int(date_string[:2]) else: raise ValueError elif i == 4 or i == 5: - if date_len == 6 or (date_len == 7 - and date_string[i - 1] == '-'): + if date_len == 6 or (date_len == 7 and + date_string[i - 1] == '-'): # r'(\d\d\d\d)-?Q(\d)' quarter = int(date_string[-1]) year = int(date_string[:4]) @@ -317,7 +321,7 @@ def dateutil_parse(object timestr, object default, ignoretz=False, res = DEFAULTPARSER._parse(fobj, **kwargs) # dateutil 2.2 compat - if isinstance(res, tuple): # PyTuple_Check + if isinstance(res, tuple): # PyTuple_Check res, _ = res if res is None: @@ -390,7 +394,7 @@ cpdef object _get_rule_month(object source, object default='DEC'): return source.split('-')[1] -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Parsing for type-inference @@ -404,7 +408,7 @@ def try_parse_dates(ndarray[object] values, parser=None, result = np.empty(n, dtype='O') if parser is None: - if default is None: # GH2618 + if default is None: # GH2618 date = datetime.now() default = datetime(date.year, date.month, 1) @@ -449,7 +453,7 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times, result = np.empty(n, dtype='O') if date_parser is None: - if default is None: # GH2618 + if default is None: # GH2618 date = datetime.now() default = datetime(date.year, date.month, 1) @@ -506,7 +510,7 @@ def try_parse_datetime_components(ndarray[object] years, n = len(years) if (len(months) != n or len(days) != n or len(hours) != n or - len(minutes) != n or len(seconds) != n): + len(minutes) != n or len(seconds) != n): raise ValueError('Length of all datetime components must be equal') result = np.empty(n, dtype='O') @@ -525,7 +529,7 @@ def try_parse_datetime_components(ndarray[object] years, return result -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Miscellaneous _DATEUTIL_LEXER_SPLIT = None diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 4a141c7b56428..69ffcb980a754 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -76,6 +76,7 @@ def array_strptime(ndarray[object] values, object fmt, bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' + int ordinal assert is_raise or is_ignore or is_coerce @@ -109,7 +110,7 @@ def array_strptime(ndarray[object] values, object fmt, bad_directive = "%" del err raise ValueError("'%s' is a bad directive in format '%s'" % - (bad_directive, fmt)) + (bad_directive, fmt)) # IndexError only occurs when the format string is "%" except IndexError: raise ValueError("stray %% in format '%s'" % fmt) @@ -170,7 +171,7 @@ def array_strptime(ndarray[object] values, object fmt, iresult[i] = NPY_NAT continue raise ValueError("unconverted data remains: %s" % - values[i][found.end():]) + values[i][found.end():]) # search else: @@ -205,8 +206,8 @@ def array_strptime(ndarray[object] values, object fmt, if parse_code == 0: year = int(found_dict['y']) # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 + # value in the range of [00, 68] is in the century 2000, while + # [69,99] is in the century 1900 if year <= 68: year += 2000 else: @@ -303,8 +304,8 @@ def array_strptime(ndarray[object] values, object fmt, if julian == -1: # Need to add 1 to result since first day of the year is 1, not # 0. - julian = datetime_date(year, month, day).toordinal() - \ - datetime_date(year, 1, 1).toordinal() + 1 + ordinal = datetime_date(year, month, day).toordinal() + julian = ordinal - datetime_date(year, 1, 1).toordinal() + 1 else: # Assume that if they bothered to include Julian day it will # be accurate. datetime_result = datetime_date.fromordinal( @@ -461,8 +462,8 @@ class LocaleTime(object): date_time[1] = time.strftime("%x", time_tuple).lower() date_time[2] = time.strftime("%X", time_tuple).lower() replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], - '%B'), (self.a_weekday[2], '%a'), + (self.f_month[3], '%B'), + (self.a_weekday[2], '%a'), (self.a_month[3], '%b'), (self.am_pm[1], '%p'), ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), @@ -470,7 +471,7 @@ class LocaleTime(object): # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I')] replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) + for tz in tz_values]) for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')): current_format = date_time[offset] for old, new in replacement_pairs: @@ -543,7 +544,7 @@ class TimeRE(dict): 'w': r"(?P[0-6])", # W is set below by using 'U' 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than + # XXX: Does 'Y' need to worry about having less or more than # 4 digits? 'Y': r"(?P\d\d\d\d)", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), @@ -611,7 +612,7 @@ _cache_lock = _thread_allocate_lock() # DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock # first! _TimeRE_cache = TimeRE() -_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache +_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache _regex_cache = {} @@ -622,7 +623,7 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, assumes the week starts on Sunday or Monday (6 or 0).""" cdef: - int first_weekday, week_0_length, days_to_week + int first_weekday, week_0_length, days_to_week first_weekday = datetime_date(year, 1, 1).weekday() # If we are dealing with the %U directive (week starts on Sunday), it's diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 1785c85da4949..36191ecc16aa4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -222,8 +222,8 @@ cdef inline parse_timedelta_string(object ts): # e.g. hh:mm:ss.fffffff elif have_dot: - if ((len(number) or len(frac)) and not len(unit) - and current_unit is None): + if ((len(number) or len(frac)) and not len(unit) and + current_unit is None): raise ValueError("no units specified") if len(frac) > 0 and len(frac) <= 3: diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 7f778dde86e23..c16763d2b6cf2 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -156,9 +156,10 @@ cdef inline object tz_cache_key(object tz): return None -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # UTC Offsets + cpdef get_utcoffset(tzinfo, obj): try: return tzinfo._utcoffset @@ -173,8 +174,8 @@ cdef inline bint is_fixed_offset(object tz): else: return 0 elif treat_tz_as_pytz(tz): - if (len(tz._transition_info) == 0 - and len(tz._utc_transition_times) == 0): + if (len(tz._transition_info) == 0 and + len(tz._utc_transition_times) == 0): return 1 else: return 0 @@ -246,7 +247,7 @@ cdef object get_dst_info(object tz): # get utc trans times trans_list = get_utc_trans_times_from_dateutil_tz(tz) trans = np.hstack([ - np.array([0], dtype='M8[s]'), # place holder for first item + np.array([0], dtype='M8[s]'), # place holder for 1st item np.array(trans_list, dtype='M8[s]')]).astype( 'M8[ns]') # all trans listed trans = trans.view('i8') diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index b6bd6f92f6199..a95e50785c9b0 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1381,8 +1381,8 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, else: vlow = skiplist.get(idx) vhigh = skiplist.get(idx + 1) - output[i] = (vlow + (vhigh - vlow) * - (quantile * (nobs - 1) - idx)) + output[i] = ((vlow + (vhigh - vlow) * + (quantile * (nobs - 1) - idx))) else: output[i] = NaN diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx index fd3f4612fb432..f6383b42d4975 100644 --- a/pandas/io/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -1,5 +1,5 @@ # coding: utf-8 -#cython: embedsignature=True +# cython: embedsignature=True from cpython cimport * from libc.stdlib cimport * diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index 22401d7514f65..d82c5216a08eb 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -20,7 +20,7 @@ cdef extern from "../../src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook - bint has_pairs_hook # call object_hook with k-v pairs + bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook PyObject* ext_hook char *encoding @@ -100,7 +100,7 @@ def default_read_extended_type(typecode, data): def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, @@ -257,7 +257,7 @@ cdef class Unpacker(object): object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, @@ -467,8 +467,8 @@ cdef class Unpacker(object): return self._unpack(unpack_construct, None, 1) # for debug. - #def _buf(self): + # def _buf(self): # return PyString_FromStringAndSize(self.buf, self.buf_tail) - #def _off(self): + # def _off(self): # return self.buf_head From 3017c950b39efd6f416d95448c938cda933f27cb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Oct 2017 08:47:23 -0700 Subject: [PATCH 2/7] allow binary operators after line break --- ci/lint.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index fc7e0cf9ed3c5..bc93405a05efe 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -10,7 +10,7 @@ if [ "$LINT" ]; then # pandas/_libs/src is C code, so no need to search there. echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/_libs/src + flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503 if [ $? -ne "0" ]; then RET=1 fi @@ -24,7 +24,7 @@ if [ "$LINT" ]; then echo "Linting setup.py DONE" echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,W503,E301,E127,E261,E271,E129,W291,E222,E241,E123 + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123 if [ $? -ne "0" ]; then RET=1 fi From b6931800df6d9902c245bb89f1e98154a064925e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Oct 2017 08:59:51 -0700 Subject: [PATCH 3/7] fix remaining flake8 complaints in pyx files, except for Timedelta --- ci/lint.sh | 2 +- pandas/_libs/lib.pyx | 3 ++- pandas/_libs/sparse.pyx | 2 +- pandas/_libs/src/reduce.pyx | 3 ++- pandas/_libs/tslib.pyx | 37 +++++++++++++---------------- pandas/_libs/tslibs/conversion.pyx | 4 ++-- pandas/_libs/tslibs/frequencies.pyx | 15 ++++++------ pandas/_libs/tslibs/parsing.pyx | 2 ++ pandas/_libs/tslibs/strptime.pyx | 3 ++- pandas/io/msgpack/_unpacker.pyx | 2 +- 10 files changed, 37 insertions(+), 36 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index bc93405a05efe..82131cd0f89d8 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -17,7 +17,7 @@ if [ "$LINT" ]; then echo "Linting *.py DONE" echo "Linting setup.py" - flake8 setup.py + flake8 setup.py --ignore=W503 if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7996d0f454aee..eae1dd6a753e3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -121,6 +121,7 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr): s += arr[i].__sizeof__() return s + # ---------------------------------------------------------------------- # isnull / notnull related @@ -130,7 +131,7 @@ cdef double NEGINF = -INF cpdef bint checknull(object val): if util.is_float_object(val) or util.is_complex_object(val): - return val != val # and val != INF and val != NEGINF + return val != val # and val != INF and val != NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT elif val is NaT: diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 7f18302767635..5484cbda5bdf9 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -328,7 +328,7 @@ cdef class BlockIndex(SparseIndex): ndarray blocs, blengths cdef: - object __weakref__ # need to be picklable + object __weakref__ # need to be picklable int32_t *locbuf int32_t *lenbuf diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx index 091f3523c402b..d1761384114ef 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/src/reduce.pyx @@ -1,4 +1,5 @@ -#cython=False +# -*- coding: utf-8 -*- +# cython: profile=False import numpy as np from distutils.version import LooseVersion diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 131bb62b2301a..b8e2d881c6308 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -80,7 +80,6 @@ UTC = pytz.utc # initialize numpy import_array() -#import_ufunc() # import datetime C API PyDateTime_IMPORT @@ -93,8 +92,7 @@ from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_timezone, get_utcoffset, maybe_get_tz, - get_dst_info - ) + get_dst_info) from tslibs.fields import ( get_date_name_field, get_start_end_field, get_date_field, build_field_sarray) @@ -1094,7 +1092,7 @@ Timestamp.min = Timestamp(_NS_LOWER_BOUND) Timestamp.max = Timestamp(_NS_UPPER_BOUND) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Frequency inference def unique_deltas(ndarray[int64_t] arr): @@ -1269,7 +1267,7 @@ cdef class _Timestamp(datetime): if self.tzinfo is None: if other.tzinfo is not None: raise TypeError('Cannot compare tz-naive and tz-aware ' - 'timestamps') + 'timestamps') elif other.tzinfo is None: raise TypeError('Cannot compare tz-naive and tz-aware timestamps') @@ -1439,10 +1437,10 @@ cdef class _Timestamp(datetime): # format a Timestamp with only _date_repr if possible # otherwise _repr_base if (self.hour == 0 and - self.minute == 0 and - self.second == 0 and - self.microsecond == 0 and - self.nanosecond == 0): + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): return self._date_repr return self._repr_base @@ -1460,7 +1458,7 @@ cdef PyTypeObject* ts_type = Timestamp cdef inline bint is_timestamp(object o): - return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) + return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) cdef bint _nat_scalar_rules[6] @@ -1528,7 +1526,7 @@ cdef class _NaT(datetime): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) if PyDateTime_Check(other): - return NaT + return NaT elif PyDelta_Check(other): return NaT @@ -1681,8 +1679,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, # sort of a temporary hack if ts.tzinfo is not None: - if (hasattr(tz, 'normalize') and - hasattr(ts.tzinfo, '_utcoffset')): + if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): ts = tz.normalize(ts) obj.value = _pydatetime_to_dts(ts, &obj.dts) obj.tzinfo = ts.tzinfo @@ -1772,8 +1769,8 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, obj = convert_to_tsobject(obj.value, obj.tzinfo, None, 0, 0) dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, - obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) obj = convert_datetime_to_tsobject(dt, tz, nanos=obj.dts.ps / 1000) return obj @@ -2033,7 +2030,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if not need_to_iterate: if ((fvalues < _NS_LOWER_BOUND).any() - or (fvalues > _NS_UPPER_BOUND).any()): + or (fvalues > _NS_UPPER_BOUND).any()): raise OutOfBoundsDatetime( "cannot convert input with unit '{0}'".format(unit)) result = (iresult *m).astype('M8[ns]') @@ -2302,7 +2299,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("{0} is not convertible to datetime" .format(type(val))) - if seen_datetime and seen_integer: + if seen_datetime and seen_integer: # we have mixed datetimes & integers if is_coerce: @@ -3133,7 +3130,7 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Conversion routines cpdef int64_t _delta_to_nanoseconds(delta) except? -1: @@ -3231,8 +3228,8 @@ def i8_to_pydt(int64_t i8, object tzinfo=None): return Timestamp(i8) +# ---------------------------------------------------------------------- # Accessors -#---------------------------------------------------------------------- def get_time_micros(ndarray[int64_t] dtindex): """ @@ -3382,8 +3379,8 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None): return True +# ---------------------------------------------------------------------- # Some general helper functions -#---------------------------------------------------------------------- def monthrange(int64_t year, int64_t month): diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index de75dfe8ed25a..f97b9569633fe 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,7 +39,7 @@ UTC = pytz.UTC # lightweight C object to hold datetime & int64 pair cdef class _TSObject: - #cdef: + # cdef: # pandas_datetimestruct dts # pandas_datetimestruct # int64_t value # numpy dt64 # object tzinfo @@ -255,7 +255,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = (int(get_utcoffset(tz2, dt).total_seconds()) * + delta = (int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000) result[i] = v + delta return result diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index e15b4f6d7a6b4..9d810bfb411af 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -85,21 +85,20 @@ _period_code_map.update({'Y' + key[1:]: _period_code_map[key] if key.startswith('A-')}) _period_code_map.update({ - "Q": 2000, # Quarterly - December year end (default quarterly) - "A": 1000, # Annual - "W": 4000, # Weekly - "C": 5000, # Custom Business Day - }) + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000}) # Custom Business Day _lite_rule_alias = { 'W': 'W-SUN', 'Q': 'Q-DEC', - 'A': 'A-DEC', # YearEnd(month=12), + 'A': 'A-DEC', # YearEnd(month=12), 'Y': 'A-DEC', - 'AS': 'AS-JAN', # YearBegin(month=1), + 'AS': 'AS-JAN', # YearBegin(month=1), 'YS': 'AS-JAN', - 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BA': 'BA-DEC', # BYearEnd(month=12), 'BY': 'BA-DEC', 'BAS': 'BAS-JAN', # BYearBegin(month=1), 'BYS': 'BAS-JAN', diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 1f6dbe197dfc9..cbf420bbf3829 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -47,6 +47,7 @@ from dateutil.parser import parse as du_parse # ---------------------------------------------------------------------- # Constants + class DateParseError(ValueError): pass @@ -69,6 +70,7 @@ NAT_SENTINEL = object() # ---------------------------------------------------------------------- + def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=False, **kwargs): """parse datetime string, only returns datetime. diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 69ffcb980a754..14a765c3c4d30 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -306,7 +306,8 @@ def array_strptime(ndarray[object] values, object fmt, # 0. ordinal = datetime_date(year, month, day).toordinal() julian = ordinal - datetime_date(year, 1, 1).toordinal() + 1 - else: # Assume that if they bothered to include Julian day it will + else: + # Assume that if they bothered to include Julian day it will # be accurate. datetime_result = datetime_date.fromordinal( (julian - 1) + datetime_date(year, 1, 1).toordinal()) diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index d82c5216a08eb..05dfaad8b2058 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -1,5 +1,5 @@ # coding: utf-8 -#cython: embedsignature=True +# cython: embedsignature=True from cpython cimport * cdef extern from "Python.h": From 29cf6e8b24e292b38d388d736b802fd50b391c27 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Oct 2017 09:55:10 -0700 Subject: [PATCH 4/7] revert binary op wrapping --- pandas/_libs/interval.pyx | 6 +++--- pandas/_libs/lib.pyx | 18 +++++++++--------- pandas/_libs/parsers.pyx | 4 ++-- pandas/_libs/period.pyx | 4 ++-- pandas/_libs/src/inference.pyx | 4 ++-- pandas/_libs/tslib.pyx | 6 +++--- pandas/_libs/tslibs/parsing.pyx | 16 ++++++++-------- pandas/_libs/tslibs/timedeltas.pyx | 4 ++-- pandas/_libs/tslibs/timezones.pyx | 4 ++-- 9 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 54b7725b10692..c09642511207a 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -41,9 +41,9 @@ cdef class IntervalMixin: cdef _interval_like(other): - return (hasattr(other, 'left') and - hasattr(other, 'right') and - hasattr(other, 'closed')) + return (hasattr(other, 'left') + and hasattr(other, 'right') + and hasattr(other, 'closed')) cdef class Interval(IntervalMixin): diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5ce754e823b91..f882c3d7a7621 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -186,18 +186,18 @@ cpdef bint isscalar(object val): """ - return (np.PyArray_IsAnyScalar(val) or + return (np.PyArray_IsAnyScalar(val) # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. - PyBytes_Check(val) or + or PyBytes_Check(val) # We differ from numpy (as of 1.10), which claims that None is # not scalar in np.isscalar(). - val is None or - PyDate_Check(val) or - PyDelta_Check(val) or - PyTime_Check(val) or - util.is_period_object(val) or - is_decimal(val) or - is_interval(val)) + or val is None + or PyDate_Check(val) + or PyDelta_Check(val) + or PyTime_Check(val) + or util.is_period_object(val) + or is_decimal(val) + or is_interval(val)) def item_from_zerodim(object val): diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 7fdd0630086d7..a5ce6c560d844 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -756,8 +756,8 @@ cdef class TextReader: start = self.parser.line_start[0] # e.g., if header=3 and file only has 2 lines - elif (self.parser.lines < hr + 1 and - not isinstance(self.orig_header, list)) or ( + elif (self.parser.lines < hr + 1 + and not isinstance(self.orig_header, list)) or ( self.parser.lines < hr): msg = self.orig_header if isinstance(msg, list): diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index e21d69cdf59af..808c0b2d47b62 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -290,8 +290,8 @@ def period_format(int64_t value, int freq, object fmt=None): right = period_asfreq(value, freq, 6000, 1) return '%s/%s' % (period_format(left, 6000), period_format(right, 6000)) - elif (freq_group == 5000 or # BUS - freq_group == 6000): # DAY + elif (freq_group == 5000 # BUS + or freq_group == 6000): # DAY fmt = b'%Y-%m-%d' elif freq_group == 7000: # HR fmt = b'%Y-%m-%d %H:00' diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 07f15b0915d2e..65680f8601c89 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -166,8 +166,8 @@ cdef class Seen(object): two conflict cases was also detected. However, we are trying to force conversion to a numeric dtype. """ - return (self.uint_ and (self.null_ or self.sint_) and - not self.coerce_numeric) + return (self.uint_ and (self.null_ or self.sint_) + and not self.coerce_numeric) cdef inline saw_null(self): """ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c6389aee32faa..335cca7a3f8b0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -761,9 +761,9 @@ class Timestamp(_Timestamp): Returns if the Timestamp has a time component in addition to the date part """ - return (self.time() != _zero_time or - self.tzinfo is not None or - self.nanosecond != 0) + return (self.time() != _zero_time + or self.tzinfo is not None + or self.nanosecond != 0) def to_julian_date(self): """ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index cbf420bbf3829..90882eefd9f67 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -235,27 +235,27 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, i = date_string.index('Q', 1, 6) if i == 1: quarter = int(date_string[0]) - if date_len == 4 or (date_len == 5 and - date_string[i + 1] == '-'): + if date_len == 4 or (date_len == 5 + and date_string[i + 1] == '-'): # r'(\d)Q-?(\d\d)') year = 2000 + int(date_string[-2:]) - elif date_len == 6 or (date_len == 7 and - date_string[i + 1] == '-'): + elif date_len == 6 or (date_len == 7 + and date_string[i + 1] == '-'): # r'(\d)Q-?(\d\d\d\d)') year = int(date_string[-4:]) else: raise ValueError elif i == 2 or i == 3: # r'(\d\d)-?Q(\d)' - if date_len == 4 or (date_len == 5 and - date_string[i - 1] == '-'): + if date_len == 4 or (date_len == 5 + and date_string[i - 1] == '-'): quarter = int(date_string[-1]) year = 2000 + int(date_string[:2]) else: raise ValueError elif i == 4 or i == 5: - if date_len == 6 or (date_len == 7 and - date_string[i - 1] == '-'): + if date_len == 6 or (date_len == 7 + and date_string[i - 1] == '-'): # r'(\d\d\d\d)-?Q(\d)' quarter = int(date_string[-1]) year = int(date_string[:4]) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 36191ecc16aa4..8356c44869f2b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -222,8 +222,8 @@ cdef inline parse_timedelta_string(object ts): # e.g. hh:mm:ss.fffffff elif have_dot: - if ((len(number) or len(frac)) and not len(unit) and - current_unit is None): + if ((len(number) or len(frac)) and not len(unit) + and current_unit is None): raise ValueError("no units specified") if len(frac) > 0 and len(frac) <= 3: diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index c16763d2b6cf2..7fb48e7c66f47 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -174,8 +174,8 @@ cdef inline bint is_fixed_offset(object tz): else: return 0 elif treat_tz_as_pytz(tz): - if (len(tz._transition_info) == 0 and - len(tz._utc_transition_times) == 0): + if (len(tz._transition_info) == 0 + and len(tz._utc_transition_times) == 0): return 1 else: return 0 From fbe7bca2ecf7adcd3dbc98f72460b896544a7a87 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Oct 2017 14:54:45 -0700 Subject: [PATCH 5/7] fix remaining flake8 complaints, ignore lambda E731 --- ci/lint.sh | 2 +- pandas/_libs/tslib.pyx | 13 ++++---- pandas/tests/io/test_feather.py | 16 +++++----- pandas/tests/io/test_pytables.py | 51 ++++++++++++++++---------------- pandas/tseries/frequencies.py | 2 +- setup.py | 16 +++++----- 6 files changed, 51 insertions(+), 49 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 82131cd0f89d8..8621342377b9d 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -10,7 +10,7 @@ if [ "$LINT" ]; then # pandas/_libs/src is C code, so no need to search there. echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503 + flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503,E731 if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 335cca7a3f8b0..9d6ef4ad03367 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -2369,9 +2369,9 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', cdef class _Timedelta(timedelta): cdef readonly: - int64_t value # nanoseconds - object freq # frequency reference - bint is_populated # are my components populated + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns def __hash__(_Timedelta self): @@ -2541,8 +2541,8 @@ class Timedelta(_Timedelta): return int(v) elif is_float_object(v): return float(v) - raise TypeError( - "Invalid type {0}. Must be int or float.".format(type(v))) + raise TypeError("Invalid type {0}. Must be int or " + "float.".format(type(v))) kwargs = dict([(k, _to_py_int_float(v)) for k, v in iteritems(kwargs)]) @@ -2731,6 +2731,7 @@ class Timedelta(_Timedelta): def __repr__(self): return "Timedelta('{0}')".format(self._repr_base(format='long')) + def __str__(self): return self._repr_base(format='long') @@ -3016,6 +3017,7 @@ class Timedelta(_Timedelta): __pos__ = _op_unary_method(lambda x: x, '__pos__') __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + # resolution in ns Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) @@ -3388,7 +3390,6 @@ cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month - 1] - cpdef normalize_date(object dt): """ Normalize datetime.datetime value to midnight. Returns datetime.date as a diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index dadfe7ca87e48..021f3715d472b 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -1,17 +1,17 @@ """ test feather-format compat """ - -import pytest -feather = pytest.importorskip('feather') +from distutils.version import LooseVersion import numpy as np -import pandas as pd -from pandas.io.feather_format import to_feather, read_feather -from feather import FeatherError -from pandas.util.testing import assert_frame_equal, ensure_clean +import pandas as pd import pandas.util.testing as tm -from distutils.version import LooseVersion +from pandas.util.testing import assert_frame_equal, ensure_clean + +import pytest +feather = pytest.importorskip('feather') +from feather import FeatherError # noqa:E402 +from pandas.io.feather_format import to_feather, read_feather # noqa:E402 fv = LooseVersion(feather.__version__) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a97747b93369f..13bf81889af1a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -3,39 +3,38 @@ import tempfile from contextlib import contextmanager from warnings import catch_warnings +from distutils.version import LooseVersion import datetime from datetime import timedelta + import numpy as np -import pandas import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, - isna) + isna, compat, concat, Timestamp) + +import pandas.util.testing as tm +from pandas.util.testing import (assert_panel4d_equal, + assert_panel_equal, + assert_frame_equal, + assert_series_equal, + set_timezone) from pandas.compat import (is_platform_windows, is_platform_little_endian, - PY3, PY35, PY36, BytesIO, text_type) + PY3, PY35, PY36, BytesIO, text_type, + range, lrange, u) from pandas.io.formats.printing import pprint_thing from pandas.core.dtypes.common import is_categorical_dtype tables = pytest.importorskip('tables') -from pandas.io.pytables import TableIterator -from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, +from pandas.io import pytables as pytables # noqa:E402 +from pandas.io.pytables import (TableIterator, # noqa:E402 + HDFStore, get_store, Term, read_hdf, PossibleDataLossError, ClosedFileError) -from pandas.io import pytables as pytables -import pandas.util.testing as tm -from pandas.util.testing import (assert_panel4d_equal, - assert_panel_equal, - assert_frame_equal, - assert_series_equal, - set_timezone) -from pandas import concat, Timestamp -from pandas import compat -from pandas.compat import range, lrange, u -from distutils.version import LooseVersion _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' else 'zlib') @@ -328,13 +327,13 @@ def test_api_default_format(self): with ensure_clean_store(self.path) as store: df = tm.makeDataFrame() - pandas.set_option('io.hdf.default_format', 'fixed') + pd.set_option('io.hdf.default_format', 'fixed') _maybe_remove(store, 'df') store.put('df', df) assert not store.get_storer('df').is_table pytest.raises(ValueError, store.append, 'df2', df) - pandas.set_option('io.hdf.default_format', 'table') + pd.set_option('io.hdf.default_format', 'table') _maybe_remove(store, 'df') store.put('df', df) assert store.get_storer('df').is_table @@ -342,19 +341,19 @@ def test_api_default_format(self): store.append('df2', df) assert store.get_storer('df').is_table - pandas.set_option('io.hdf.default_format', None) + pd.set_option('io.hdf.default_format', None) with ensure_clean_path(self.path) as path: df = tm.makeDataFrame() - pandas.set_option('io.hdf.default_format', 'fixed') + pd.set_option('io.hdf.default_format', 'fixed') df.to_hdf(path, 'df') with HDFStore(path) as store: assert not store.get_storer('df').is_table pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True) - pandas.set_option('io.hdf.default_format', 'table') + pd.set_option('io.hdf.default_format', 'table') df.to_hdf(path, 'df3') with HDFStore(path) as store: assert store.get_storer('df3').is_table @@ -362,7 +361,7 @@ def test_api_default_format(self): with HDFStore(path) as store: assert store.get_storer('df4').is_table - pandas.set_option('io.hdf.default_format', None) + pd.set_option('io.hdf.default_format', None) def test_keys(self): @@ -1086,7 +1085,7 @@ def _try_decode(x, encoding='latin-1'): examples = [] for dtype in ['category', object]: for val in values: - examples.append(pandas.Series(val, dtype=dtype)) + examples.append(pd.Series(val, dtype=dtype)) def roundtrip(s, key='data', encoding='latin-1', nan_rep=''): with ensure_clean_path(self.path) as store: @@ -1171,13 +1170,13 @@ def test_append_all_nans(self): tm.assert_frame_equal(store['df2'], df) # tests the option io.hdf.dropna_table - pandas.set_option('io.hdf.dropna_table', False) + pd.set_option('io.hdf.dropna_table', False) _maybe_remove(store, 'df3') store.append('df3', df[:10]) store.append('df3', df[10:]) tm.assert_frame_equal(store['df3'], df) - pandas.set_option('io.hdf.dropna_table', True) + pd.set_option('io.hdf.dropna_table', True) _maybe_remove(store, 'df4') store.append('df4', df[:10]) store.append('df4', df[10:]) @@ -2253,7 +2252,7 @@ def test_calendar_roundtrip_issue(self): weekmask_egypt = 'Sun Mon Tue Wed Thu' holidays = ['2012-05-01', datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')] - bday_egypt = pandas.offsets.CustomBusinessDay( + bday_egypt = pd.offsets.CustomBusinessDay( holidays=holidays, weekmask=weekmask_egypt) dt = datetime.datetime(2013, 4, 30) dts = date_range(dt, periods=5, freq=bday_egypt) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 763e6547ea2cb..be25a439f9075 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -312,7 +312,7 @@ def _get_freq_str(base, mult=1): # --------------------------------------------------------------------- # Offset names ("time rules") and related functions -from pandas._libs.tslibs.offsets import _offset_to_period_map +from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402 from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa Minute, Hour, Day, BDay, CDay, Week, MonthBegin, diff --git a/setup.py b/setup.py index e60ba18ae34d9..48c9c5289d99f 100755 --- a/setup.py +++ b/setup.py @@ -7,10 +7,16 @@ """ import os +from os.path import join as pjoin + import sys import shutil from distutils.version import LooseVersion +# versioning +import versioneer +cmdclass = versioneer.get_cmdclass() + def is_platform_windows(): return sys.platform == 'win32' or sys.platform == 'cygwin' @@ -24,9 +30,6 @@ def is_platform_mac(): return sys.platform == 'darwin' -# versioning -import versioneer -cmdclass = versioneer.get_cmdclass() min_cython_ver = '0.23' try: @@ -77,9 +80,9 @@ def is_platform_mac(): " use pip or easy_install." "\n $ pip install 'python-dateutil < 2' 'numpy'") -from distutils.extension import Extension -from distutils.command.build import build -from distutils.command.build_ext import build_ext as _build_ext +from distutils.extension import Extension # noqa:E402 +from distutils.command.build import build # noqa:E402 +from distutils.command.build_ext import build_ext as _build_ext # noqa:E402 try: if not _CYTHON_INSTALLED: @@ -105,7 +108,6 @@ def is_platform_mac(): 'pip install Tempita') -from os.path import join as pjoin _pxi_dep_template = { From 86fe650db1760b2800cb1dd1f40bebc01a126db6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Oct 2017 15:39:40 -0700 Subject: [PATCH 6/7] catch start imports; cleanup --- ci/lint.sh | 4 ++-- pandas/_libs/tslib.pyx | 13 ++++++------- setup.py | 3 --- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 8621342377b9d..f19bfb42486a4 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -24,7 +24,7 @@ if [ "$LINT" ]; then echo "Linting setup.py DONE" echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123 + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then RET=1 fi @@ -34,7 +34,7 @@ if [ "$LINT" ]; then for path in 'src' do echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126 + flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9d6ef4ad03367..13c1caf4e3a29 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -2532,9 +2532,9 @@ class Timedelta(_Timedelta): if value is _no_input: if not len(kwargs): - raise ValueError( - "cannot construct a Timedelta without a value/unit or " - "descriptive keywords (days,seconds....)") + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") def _to_py_int_float(v): if is_integer_object(v): @@ -2545,7 +2545,7 @@ class Timedelta(_Timedelta): "float.".format(type(v))) kwargs = dict([(k, _to_py_int_float(v)) - for k, v in iteritems(kwargs)]) + for k, v in iteritems(kwargs)]) try: nano = kwargs.pop('nanoseconds', 0) @@ -2575,9 +2575,8 @@ class Timedelta(_Timedelta): elif _checknull_with_nat(value): return NaT else: - raise ValueError( - "Value must be Timedelta, string, integer, " - "float, timedelta or convertible") + raise ValueError("Value must be Timedelta, string, integer, " + "float, timedelta or convertible") if is_timedelta64_object(value): value = value.view('i8') diff --git a/setup.py b/setup.py index 48c9c5289d99f..8b338fb6c823c 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,6 @@ def is_platform_mac(): return sys.platform == 'darwin' - min_cython_ver = '0.23' try: import Cython @@ -108,8 +107,6 @@ def is_platform_mac(): 'pip install Tempita') - - _pxi_dep_template = { 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_take_helper.pxi.in', From beab9451293e467d8188ca07fbfdfb8285a2efd7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Oct 2017 15:44:32 -0700 Subject: [PATCH 7/7] move ignores to cfg --- ci/lint.sh | 4 ++-- setup.cfg | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index f19bfb42486a4..43d6ea0c118b0 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -10,14 +10,14 @@ if [ "$LINT" ]; then # pandas/_libs/src is C code, so no need to search there. echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503,E731 + flake8 pandas --filename=*.py --exclude pandas/_libs/src if [ $? -ne "0" ]; then RET=1 fi echo "Linting *.py DONE" echo "Linting setup.py" - flake8 setup.py --ignore=W503 + flake8 setup.py if [ $? -ne "0" ]; then RET=1 fi diff --git a/setup.cfg b/setup.cfg index 0123078523b6f..7a88ee8557dc7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] -ignore = E731,E402 +ignore = E731,E402,W503 max-line-length = 79 [yapf]