From 5e858c6b23b38538da42802a7857f7095dc7007f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 26 Apr 2016 09:41:32 -0400 Subject: [PATCH] PEP: use triple-double-quotes rather than triple-single-quotes around doc-strings in cython as per PEP --- pandas/algos.pyx | 36 ++--- pandas/compat/chainmap_impl.py | 12 +- pandas/core/groupby.py | 5 +- pandas/core/series.py | 4 +- pandas/index.pyx | 8 +- pandas/indexes/base.py | 4 +- pandas/indexes/multi.py | 2 +- pandas/io/tests/test_sql.py | 4 - pandas/io/wb.py | 8 +- pandas/lib.pyx | 28 ++-- pandas/parser.pyx | 4 +- pandas/sandbox/qtpandas.py | 14 +- pandas/src/generate_code.py | 141 +++++++++--------- pandas/src/generated.pyx | 189 ++++++++++++------------- pandas/src/inference.pyx | 16 +-- pandas/src/reduce.pyx | 28 ++-- pandas/src/skiplist.pyx | 4 +- pandas/src/sparse.pyx | 52 +++---- pandas/stats/tests/test_ols.py | 4 +- pandas/tests/series/test_apply.py | 4 +- pandas/tests/series/test_operators.py | 2 +- pandas/tests/test_strings.py | 4 +- pandas/tools/plotting.py | 4 +- pandas/tools/util.py | 4 +- pandas/tseries/index.py | 4 +- pandas/tseries/offsets.py | 6 +- pandas/tseries/tdi.py | 4 +- pandas/tseries/tests/test_timezones.py | 12 +- pandas/tseries/tests/test_tslib.py | 4 +- pandas/tslib.pyx | 66 ++++----- pandas/util/decorators.py | 2 +- 31 files changed, 334 insertions(+), 345 deletions(-) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index eddeb52ca1154..a31b35ba4afc6 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -838,9 +838,9 @@ cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n): cpdef numeric median(numeric[:] arr): - ''' + """ A faster median - ''' + """ cdef Py_ssize_t n = arr.size if n == 0: @@ -999,7 +999,7 @@ def roll_mean(ndarray[double_t] input, # Exponentially weighted moving average def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, int minp): - ''' + """ Compute exponentially-weighted moving average using center-of-mass. Parameters @@ -1013,7 +1013,7 @@ def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, int m Returns ------- y : ndarray - ''' + """ cdef Py_ssize_t N = len(input) cdef ndarray[double_t] output = np.empty(N, dtype=float) @@ -1061,7 +1061,7 @@ def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, int m def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, double_t com, int adjust, int ignore_na, int minp, int bias): - ''' + """ Compute exponentially-weighted moving variance using center-of-mass. Parameters @@ -1077,7 +1077,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, Returns ------- y : ndarray - ''' + """ cdef Py_ssize_t N = len(input_x) if len(input_y) != N: @@ -1761,9 +1761,9 @@ cdef _roll_min_max(ndarray[numeric] a, int window, int minp, bint is_max): def roll_quantile(ndarray[float64_t, cast=True] input, int win, int minp, double quantile): - ''' + """ O(N log(window)) implementation using skip list - ''' + """ cdef double val, prev, midpoint cdef IndexableSkiplist skiplist cdef Py_ssize_t nobs = 0, i @@ -1997,12 +1997,12 @@ def groupby_indices(ndarray values): @cython.wraparound(False) @cython.boundscheck(False) def group_labels(ndarray[object] values): - ''' + """ Compute label vector from input values and associated useful data Returns ------- - ''' + """ cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] labels = np.empty(n, dtype=np.int64) @@ -2074,9 +2074,9 @@ def group_nth_object(ndarray[object, ndim=2] out, ndarray[object, ndim=2] values, ndarray[int64_t] labels, int64_t rank): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab object val @@ -2117,9 +2117,9 @@ def group_nth_bin_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, ndarray[int64_t] bins, int64_t rank): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, ngroups, b object val @@ -2167,9 +2167,9 @@ def group_last_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab object val @@ -2209,9 +2209,9 @@ def group_last_bin_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, ndarray[int64_t] bins): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, ngroups, b object val diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py index c059ad08d4a7f..05a0d5faa4c2a 100644 --- a/pandas/compat/chainmap_impl.py +++ b/pandas/compat/chainmap_impl.py @@ -33,7 +33,7 @@ def wrapper(self): class ChainMap(MutableMapping): - ''' A ChainMap groups multiple dicts (or other mappings) together + """ A ChainMap groups multiple dicts (or other mappings) together to create a single, updateable view. The underlying mappings are stored in a list. That list is public and can @@ -43,13 +43,13 @@ class ChainMap(MutableMapping): In contrast, writes, updates, and deletions only operate on the first mapping. - ''' + """ def __init__(self, *maps): - '''Initialize a ChainMap by setting *maps* to the given mappings. + """Initialize a ChainMap by setting *maps* to the given mappings. If no mappings are provided, a single empty dictionary is used. - ''' + """ self.maps = list(maps) or [{}] # always at least one map def __missing__(self, key): @@ -101,10 +101,10 @@ def copy(self): __copy__ = copy def new_child(self, m=None): # like Django's Context.push() - ''' + """ New ChainMap with a new map followed by all previous maps. If no map is provided, an empty dict is used. - ''' + """ if m is None: m = {} return self.__class__(m, *self.maps) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 7ff6683be69ac..9c044b6c22aea 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1531,10 +1531,9 @@ def size(self): @cache_readonly def _max_groupsize(self): - ''' + """ Compute size of largest group - - ''' + """ # For many items in each group this is much faster than # self.size().max(), in worst case marginally slower if self.indices: diff --git a/pandas/core/series.py b/pandas/core/series.py index e172ef7db58e6..a33d5598be7cd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1129,7 +1129,7 @@ def to_sparse(self, kind='block', fill_value=None): fill_value=fill_value).__finalize__(self) def _set_name(self, name, inplace=False): - ''' + """ Set the Series name. Parameters @@ -1137,7 +1137,7 @@ def _set_name(self, name, inplace=False): name : str inplace : bool whether to modify `self` directly or return a copy - ''' + """ ser = self if inplace else self.copy() ser.name = name return ser diff --git a/pandas/index.pyx b/pandas/index.pyx index 25e6f35ad2a0d..71717dd2d771b 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -101,9 +101,9 @@ cdef class IndexEngine: return val in self.mapping cpdef get_value(self, ndarray arr, object key, object tz=None): - ''' + """ arr : 1-dimensional ndarray - ''' + """ cdef: object loc void* data_ptr @@ -119,9 +119,9 @@ cdef class IndexEngine: return util.get_value_at(arr, loc) cpdef set_value(self, ndarray arr, object key, object value): - ''' + """ arr : 1-dimensional ndarray - ''' + """ cdef: object loc void* data_ptr diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 75f13226f4f50..5a3e7064fda86 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2569,10 +2569,10 @@ def _join_level(self, other, level, how='left', return_indexers=False, from .multi import MultiIndex def _get_leaf_sorter(labels): - ''' + """ returns sorter for the inner most level while preserving the order of higher levels - ''' + """ if labels[0].size == 0: return np.empty(0, dtype='int64') diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index a4d8ea059dd7d..dd58bb30bf7b7 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1528,7 +1528,7 @@ def get_loc(self, key, method=None): 'currently supported for MultiIndex') def _maybe_to_slice(loc): - '''convert integer indexer to boolean mask or slice if possible''' + """convert integer indexer to boolean mask or slice if possible""" if not isinstance(loc, np.ndarray) or loc.dtype != 'int64': return loc diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index b72258cbf588d..9a995c17f0445 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -2385,8 +2385,6 @@ def test_uquery(self): sys.stdout = sys.__stdout__ def test_keyword_as_column_names(self): - ''' - ''' df = DataFrame({'From': np.ones(5)}) sql.to_sql(df, con=self.conn, name='testkeywords', index=False) @@ -2751,8 +2749,6 @@ def test_uquery(self): sys.stdout = sys.__stdout__ def test_keyword_as_column_names(self): - ''' - ''' _skip_if_no_pymysql() df = DataFrame({'From': np.ones(5)}) sql.to_sql(df, con=self.conn, name='testkeywords', diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 50ffae4970998..81b4947f06b16 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -228,8 +228,8 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', return out,"Success" def get_countries(): - '''Query information about countries - ''' + """Query information about countries + """ url = 'http://api.worldbank.org/countries/?per_page=1000&format=json' with urlopen(url) as response: data = response.read() @@ -243,8 +243,8 @@ def get_countries(): return data def get_indicators(): - '''Download information about all World Bank data series - ''' + """Download information about all World Bank data series + """ url = 'http://api.worldbank.org/indicators?per_page=50000&format=json' with urlopen(url) as response: data = response.read() diff --git a/pandas/lib.pyx b/pandas/lib.pyx index a6d6ea50caa0b..328166168a3fc 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -95,7 +95,7 @@ def values_from_object(object o): return o cpdef map_indices_list(list index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -103,7 +103,7 @@ cpdef map_indices_list(list index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -134,7 +134,7 @@ def ismember_nans(float64_t[:] arr, set values, bint hasnans): def ismember(ndarray arr, set values): - ''' + """ Checks whether Parameters @@ -145,7 +145,7 @@ def ismember(ndarray arr, set values): Returns ------- ismember : ndarray (boolean dtype) - ''' + """ cdef: Py_ssize_t i, n ndarray[uint8_t] result @@ -160,7 +160,7 @@ def ismember(ndarray arr, set values): return result.view(np.bool_) def ismember_int64(ndarray[int64_t] arr, set values): - ''' + """ Checks whether Parameters @@ -171,7 +171,7 @@ def ismember_int64(ndarray[int64_t] arr, set values): Returns ------- ismember : ndarray (boolean dtype) - ''' + """ cdef: Py_ssize_t i, n ndarray[uint8_t] result @@ -404,10 +404,10 @@ def isnullobj2d_old(ndarray[object, ndim=2] arr): @cython.wraparound(False) @cython.boundscheck(False) cpdef ndarray[object] list_to_object_array(list obj): - ''' + """ Convert list to object ndarray. Seriously can\'t believe I had to write this function - ''' + """ cdef: Py_ssize_t i, n = len(obj) ndarray[object] arr = np.empty(n, dtype=object) @@ -542,9 +542,9 @@ def dicts_to_array(list dicts, list columns): return result def fast_zip(list ndarrays): - ''' + """ For zipping multiple ndarrays into an ndarray of tuples - ''' + """ cdef: Py_ssize_t i, j, k, n ndarray[object] result @@ -959,9 +959,9 @@ cpdef ndarray[object] astype_str(ndarray arr): return result def clean_index_list(list obj): - ''' + """ Utility used in pandas.core.index._ensure_index - ''' + """ cdef: ndarray[object] converted Py_ssize_t i, n = len(obj) @@ -1325,9 +1325,9 @@ cdef class _PandasNull: pandas_null = _PandasNull() def fast_zip_fillna(list ndarrays, fill_value=pandas_null): - ''' + """ For zipping multiple ndarrays into an ndarray of tuples - ''' + """ cdef: Py_ssize_t i, j, k, n ndarray[object] result diff --git a/pandas/parser.pyx b/pandas/parser.pyx index f48e9852c6496..94d7f36f4f205 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -248,11 +248,11 @@ _NA_VALUES = [b'-1.#IND', b'1.#QNAN', b'1.#IND', b'-1.#QNAN', cdef class TextReader: - ''' + """ # source: StringIO or file object - ''' + """ cdef: parser_t *parser diff --git a/pandas/sandbox/qtpandas.py b/pandas/sandbox/qtpandas.py index 4f4d77bcdf268..b6af40a0e2156 100644 --- a/pandas/sandbox/qtpandas.py +++ b/pandas/sandbox/qtpandas.py @@ -1,8 +1,8 @@ -''' +""" Easy integration of DataFrame into pyqt framework @author: Jev Kuznetsov -''' +""" # flake8: noqa @@ -27,7 +27,7 @@ class DataFrameModel(QAbstractTableModel): - ''' data model for a DataFrame class ''' + """ data model for a DataFrame class """ def __init__(self): super(DataFrameModel, self).__init__() self.df = DataFrame() @@ -36,8 +36,8 @@ def setDataFrame(self, dataFrame): self.df = dataFrame def signalUpdate(self): - ''' tell viewers to update their data (this is full update, not - efficient)''' + """ tell viewers to update their data (this is full update, not + efficient)""" self.layoutChanged.emit() #------------- table display functions ----------------- @@ -93,7 +93,7 @@ def columnCount(self, index=QModelIndex()): class DataFrameWidget(QWidget): - ''' a simple widget for using DataFrames in a gui ''' + """ a simple widget for using DataFrames in a gui """ def __init__(self, dataFrame, parent=None): super(DataFrameWidget, self).__init__(parent) @@ -116,7 +116,7 @@ def setDataFrame(self, dataFrame): def testDf(): - ''' creates test dataframe ''' + """ creates test dataframe """ data = {'int': [1, 2, 3], 'float': [1.5, 2.5, 3.5], 'string': ['a', 'b', 'c'], 'nan': [np.nan, np.nan, np.nan]} return DataFrame(data, index=Index(['AAA', 'BBB', 'CCC']), diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 053e69b7f5426..309a81b38f4e1 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -284,7 +284,7 @@ def take_2d_multi_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values, -''' +""" Backfilling logic for generating fill vector Diagram of what's going on @@ -307,7 +307,7 @@ def take_2d_multi_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values, . 0 . 0 D -''' +""" backfill_template = """@cython.boundscheck(False) @cython.wraparound(False) @@ -621,14 +621,14 @@ def diff_2d_%(name)s(ndarray[%(c_type)s, ndim=2] arr, out[i, j] = arr[i, j] - arr[i, j - periods] """ -is_monotonic_template = """@cython.boundscheck(False) +is_monotonic_template = '''@cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n %(c_type)s prev, cur @@ -674,12 +674,12 @@ def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike): %(tab)s break %(tab)s prev = cur return is_monotonic_inc, is_monotonic_dec -""" +''' -map_indices_template = """@cython.wraparound(False) +map_indices_template = '''@cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_%(name)s(ndarray[%(c_type)s] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -687,7 +687,7 @@ def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -697,9 +697,9 @@ def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike): result[index[i]] = i return result -""" +''' -groupby_template = """@cython.wraparound(False) +groupby_template = '''@cython.wraparound(False) @cython.boundscheck(False) def groupby_%(name)s(ndarray[%(c_type)s] index, ndarray labels): cdef dict result = {} @@ -726,17 +726,17 @@ def groupby_%(name)s(ndarray[%(c_type)s] index, ndarray labels): result[key] = [idx] return result -""" +''' -group_last_template = """@cython.wraparound(False) +group_last_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(c_type)s, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) %(dest_type2)s val, count @@ -772,17 +772,17 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = %(nan_val)s else: out[i, j] = resx[i, j] -""" +''' -group_nth_template = """@cython.wraparound(False) +group_nth_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(c_type)s, ndim=2] values, ndarray[int64_t] labels, int64_t rank): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) %(dest_type2)s val, count @@ -819,17 +819,17 @@ def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = %(nan_val)s else: out[i, j] = resx[i, j] -""" +''' -group_add_template = """@cython.wraparound(False) +group_add_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_add_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(c_type)s, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) %(dest_type2)s val, count @@ -883,17 +883,17 @@ def group_add_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = NAN else: out[i, j] = sumx[i, j] -""" +''' -group_prod_template = """@cython.wraparound(False) +group_prod_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_prod_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(c_type)s, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) %(dest_type2)s val, count @@ -942,9 +942,9 @@ def group_prod_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = NAN else: out[i, j] = prodx[i, j] -""" +''' -group_var_template = """@cython.wraparound(False) +group_var_template = '''@cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision(True) def group_var_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, @@ -992,7 +992,7 @@ def group_var_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, else: out[i, j] /= (ct - 1) -""" +''' # add passing bin edges, instead of labels @@ -1000,15 +1000,15 @@ def group_var_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, #---------------------------------------------------------------------- # group_min, group_max -group_max_template = """@cython.wraparound(False) +group_max_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(dest_type2)s, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) %(dest_type2)s val, count @@ -1061,17 +1061,17 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = %(nan_val)s else: out[i, j] = maxx[i, j] -""" +''' -group_min_template = """@cython.wraparound(False) +group_min_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(dest_type2)s, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) %(dest_type2)s val, count @@ -1125,10 +1125,10 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = %(nan_val)s else: out[i, j] = minx[i, j] -""" +''' -group_mean_template = """@cython.wraparound(False) +group_mean_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_mean_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, @@ -1181,17 +1181,17 @@ def group_mean_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = NAN else: out[i, j] = sumx[i, j] / count -""" +''' -group_ohlc_template = """@cython.wraparound(False) +group_ohlc_template = '''@cython.wraparound(False) @cython.boundscheck(False) def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(dest_type2)s, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab %(dest_type2)s val, count @@ -1227,9 +1227,9 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[lab, 1] = max(out[lab, 1], val) out[lab, 2] = min(out[lab, 2], val) out[lab, 3] = val -""" +''' -arrmap_template = """@cython.wraparound(False) +arrmap_template = '''@cython.wraparound(False) @cython.boundscheck(False) def arrmap_%(name)s(ndarray[%(c_type)s] index, object func): cdef Py_ssize_t length = index.shape[0] @@ -1243,14 +1243,14 @@ def arrmap_%(name)s(ndarray[%(c_type)s] index, object func): result[i] = func(index[i]) return maybe_convert_objects(result) -""" +''' #---------------------------------------------------------------------- # Joins on ordered, unique indices # right might contain non-unique values -left_join_unique_template = """@cython.wraparound(False) +left_join_unique_template = '''@cython.wraparound(False) @cython.boundscheck(False) def left_join_indexer_unique_%(name)s(ndarray[%(c_type)s] left, ndarray[%(c_type)s] right): @@ -1294,17 +1294,16 @@ def left_join_indexer_unique_%(name)s(ndarray[%(c_type)s] left, indexer[i] = -1 i += 1 return indexer -""" +''' # @cython.wraparound(False) # @cython.boundscheck(False) -left_join_template = """ -def left_join_indexer_%(name)s(ndarray[%(c_type)s] left, - ndarray[%(c_type)s] right): - ''' +left_join_template = '''def left_join_indexer_%(name)s(ndarray[%(c_type)s] left, + ndarray[%(c_type)s] right): + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count %(c_type)s lval, rval @@ -1400,16 +1399,16 @@ def left_join_indexer_%(name)s(ndarray[%(c_type)s] left, j += 1 return result, lindexer, rindexer -""" +''' -inner_join_template = """@cython.wraparound(False) +inner_join_template = '''@cython.wraparound(False) @cython.boundscheck(False) def inner_join_indexer_%(name)s(ndarray[%(c_type)s] left, ndarray[%(c_type)s] right): - ''' + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count %(c_type)s lval, rval @@ -1495,10 +1494,10 @@ def inner_join_indexer_%(name)s(ndarray[%(c_type)s] left, j += 1 return result, lindexer, rindexer -""" +''' -outer_join_template2 = """@cython.wraparound(False) +outer_join_template2 = '''@cython.wraparound(False) @cython.boundscheck(False) def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left, ndarray[%(c_type)s] right): @@ -1626,9 +1625,9 @@ def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left, j += 1 return result, lindexer, rindexer -""" +''' -outer_join_template = """@cython.wraparound(False) +outer_join_template = '''@cython.wraparound(False) @cython.boundscheck(False) def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left, ndarray[%(c_type)s] right): @@ -1723,7 +1722,7 @@ def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left, count += 1 return result, lindexer, rindexer -""" +''' # ensure_dtype functions @@ -1774,14 +1773,13 @@ def put2d_%(name)s_%(dest_type)s(ndarray[%(c_type)s, ndim=2, cast=True] values, #---------------------------------------------------------------------- # other grouping functions not needing a template -grouping_no_template = """ -def group_median_float64(ndarray[float64_t, ndim=2] out, +grouping_no_template = '''def group_median_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[int64_t] _counts @@ -1806,15 +1804,16 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, out[j, i] = _median_linear(ptr, size) ptr += size + @cython.boundscheck(False) @cython.wraparound(False) def group_cumprod_float64(float64_t[:,:] out, float64_t[:,:] values, int64_t[:] labels, float64_t[:,:] accum): - ''' + """ Only transforms on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, size float64_t val @@ -1841,9 +1840,9 @@ def group_cumsum(numeric[:,:] out, numeric[:,:] values, int64_t[:] labels, numeric[:,:] accum): - ''' + """ Only transforms on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, size numeric val @@ -1908,7 +1907,7 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels, out[ii] = -1 label_indexer[lab, idxer_slot] = ii -""" +''' #------------------------------------------------------------------------- diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index 99031da48dd20..c6dcd609a2c6e 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -128,7 +128,7 @@ cpdef ensure_int64(object arr): @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_float64(ndarray[float64_t] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -136,7 +136,7 @@ cpdef map_indices_float64(ndarray[float64_t] index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -150,7 +150,7 @@ cpdef map_indices_float64(ndarray[float64_t] index): @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_float32(ndarray[float32_t] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -158,7 +158,7 @@ cpdef map_indices_float32(ndarray[float32_t] index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -172,7 +172,7 @@ cpdef map_indices_float32(ndarray[float32_t] index): @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_object(ndarray[object] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -180,7 +180,7 @@ cpdef map_indices_object(ndarray[object] index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -194,7 +194,7 @@ cpdef map_indices_object(ndarray[object] index): @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_int32(ndarray[int32_t] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -202,7 +202,7 @@ cpdef map_indices_int32(ndarray[int32_t] index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -216,7 +216,7 @@ cpdef map_indices_int32(ndarray[int32_t] index): @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_int64(ndarray[int64_t] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -224,7 +224,7 @@ cpdef map_indices_int64(ndarray[int64_t] index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -238,7 +238,7 @@ cpdef map_indices_int64(ndarray[int64_t] index): @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_bool(ndarray[uint8_t] index): - ''' + """ Produce a dict mapping the values of the input array to their respective locations. @@ -246,7 +246,7 @@ cpdef map_indices_bool(ndarray[uint8_t] index): array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} Better to do this with Cython because of the enormous speed boost. - ''' + """ cdef Py_ssize_t i, length cdef dict result = {} @@ -1821,11 +1821,11 @@ def backfill_2d_inplace_bool(ndarray[uint8_t, ndim=2] values, @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_float64(ndarray[float64_t] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n float64_t prev, cur @@ -1875,11 +1875,11 @@ def is_monotonic_float64(ndarray[float64_t] arr, bint timelike): @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_float32(ndarray[float32_t] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n float32_t prev, cur @@ -1929,11 +1929,11 @@ def is_monotonic_float32(ndarray[float32_t] arr, bint timelike): @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_object(ndarray[object] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n object prev, cur @@ -1983,11 +1983,11 @@ def is_monotonic_object(ndarray[object] arr, bint timelike): @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_int32(ndarray[int32_t] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n int32_t prev, cur @@ -2037,11 +2037,11 @@ def is_monotonic_int32(ndarray[int32_t] arr, bint timelike): @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_int64(ndarray[int64_t] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n int64_t prev, cur @@ -2091,11 +2091,11 @@ def is_monotonic_int64(ndarray[int64_t] arr, bint timelike): @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic_bool(ndarray[uint8_t] arr, bint timelike): - ''' + """ Returns ------- is_monotonic_inc, is_monotonic_dec - ''' + """ cdef: Py_ssize_t i, n uint8_t prev, cur @@ -7352,9 +7352,9 @@ def group_add_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count @@ -7415,9 +7415,9 @@ def group_add_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count @@ -7479,9 +7479,9 @@ def group_prod_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count @@ -7537,9 +7537,9 @@ def group_prod_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count @@ -7804,9 +7804,9 @@ def group_ohlc_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab float64_t val, count @@ -7849,9 +7849,9 @@ def group_ohlc_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab float32_t val, count @@ -7895,9 +7895,9 @@ def group_last_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count @@ -7940,9 +7940,9 @@ def group_last_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count @@ -7985,9 +7985,9 @@ def group_last_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[int64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) int64_t val, count @@ -8031,9 +8031,9 @@ def group_nth_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels, int64_t rank): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count @@ -8077,9 +8077,9 @@ def group_nth_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels, int64_t rank): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count @@ -8123,9 +8123,9 @@ def group_nth_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[int64_t, ndim=2] values, ndarray[int64_t] labels, int64_t rank): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) int64_t val, count @@ -8170,9 +8170,9 @@ def group_min_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count @@ -8233,9 +8233,9 @@ def group_min_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count @@ -8296,9 +8296,9 @@ def group_min_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[int64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) int64_t val, count @@ -8360,9 +8360,9 @@ def group_max_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count @@ -8422,9 +8422,9 @@ def group_max_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count @@ -8484,9 +8484,9 @@ def group_max_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[int64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) int64_t val, count @@ -8541,14 +8541,13 @@ def group_max_int64(ndarray[int64_t, ndim=2] out, out[i, j] = maxx[i, j] - def group_median_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' + """ Only aggregates on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[int64_t] _counts @@ -8573,15 +8572,16 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, out[j, i] = _median_linear(ptr, size) ptr += size + @cython.boundscheck(False) @cython.wraparound(False) def group_cumprod_float64(float64_t[:,:] out, float64_t[:,:] values, int64_t[:] labels, float64_t[:,:] accum): - ''' + """ Only transforms on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, size float64_t val @@ -8608,9 +8608,9 @@ def group_cumsum(numeric[:,:] out, numeric[:,:] values, int64_t[:] labels, numeric[:,:] accum): - ''' + """ Only transforms on axis=0 - ''' + """ cdef: Py_ssize_t i, j, N, K, size numeric val @@ -8902,12 +8902,11 @@ def left_join_indexer_unique_int64(ndarray[int64_t] left, return indexer - def left_join_indexer_float64(ndarray[float64_t] left, - ndarray[float64_t] right): - ''' + ndarray[float64_t] right): + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count float64_t lval, rval @@ -9004,12 +9003,11 @@ def left_join_indexer_float64(ndarray[float64_t] left, return result, lindexer, rindexer - def left_join_indexer_float32(ndarray[float32_t] left, - ndarray[float32_t] right): - ''' + ndarray[float32_t] right): + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count float32_t lval, rval @@ -9106,12 +9104,11 @@ def left_join_indexer_float32(ndarray[float32_t] left, return result, lindexer, rindexer - def left_join_indexer_object(ndarray[object] left, - ndarray[object] right): - ''' + ndarray[object] right): + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count object lval, rval @@ -9208,12 +9205,11 @@ def left_join_indexer_object(ndarray[object] left, return result, lindexer, rindexer - def left_join_indexer_int32(ndarray[int32_t] left, - ndarray[int32_t] right): - ''' + ndarray[int32_t] right): + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count int32_t lval, rval @@ -9310,12 +9306,11 @@ def left_join_indexer_int32(ndarray[int32_t] left, return result, lindexer, rindexer - def left_join_indexer_int64(ndarray[int64_t] left, - ndarray[int64_t] right): - ''' + ndarray[int64_t] right): + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count int64_t lval, rval @@ -10063,9 +10058,9 @@ def outer_join_indexer_int64(ndarray[int64_t] left, @cython.boundscheck(False) def inner_join_indexer_float64(ndarray[float64_t] left, ndarray[float64_t] right): - ''' + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count float64_t lval, rval @@ -10156,9 +10151,9 @@ def inner_join_indexer_float64(ndarray[float64_t] left, @cython.boundscheck(False) def inner_join_indexer_float32(ndarray[float32_t] left, ndarray[float32_t] right): - ''' + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count float32_t lval, rval @@ -10249,9 +10244,9 @@ def inner_join_indexer_float32(ndarray[float32_t] left, @cython.boundscheck(False) def inner_join_indexer_object(ndarray[object] left, ndarray[object] right): - ''' + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count object lval, rval @@ -10342,9 +10337,9 @@ def inner_join_indexer_object(ndarray[object] left, @cython.boundscheck(False) def inner_join_indexer_int32(ndarray[int32_t] left, ndarray[int32_t] right): - ''' + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count int32_t lval, rval @@ -10435,9 +10430,9 @@ def inner_join_indexer_int32(ndarray[int32_t] left, @cython.boundscheck(False) def inner_join_indexer_int64(ndarray[int64_t] left, ndarray[int64_t] right): - ''' + """ Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + """ cdef: Py_ssize_t i, j, k, nright, nleft, count int64_t lval, rval diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 1a5703eb91053..35c055e5e48cd 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -548,10 +548,10 @@ cdef int64_t iINT64_MIN = INT64_MIN def maybe_convert_numeric(object[:] values, set na_values, bint convert_empty=True, bint coerce_numeric=False): - ''' + """ Type inference function-- convert strings to numeric (potentially) and convert to proper dtype array - ''' + """ cdef: int status, maybe_int Py_ssize_t i, n = values.size @@ -628,9 +628,9 @@ def maybe_convert_numeric(object[:] values, set na_values, def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint safe=0, bint convert_datetime=0, bint convert_timedelta=0): - ''' + """ Type inference function-- convert object array to proper dtype - ''' + """ cdef: Py_ssize_t i, n ndarray[float64_t] floats @@ -1024,7 +1024,7 @@ def maybe_convert_bool(ndarray[object] arr, def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, bint convert=1): - ''' + """ Substitute for np.vectorize with pandas-friendly dtype inference Parameters @@ -1035,7 +1035,7 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, Returns ------- mapped : ndarray - ''' + """ cdef: Py_ssize_t i, n ndarray[object] result @@ -1065,7 +1065,7 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, return result def map_infer(ndarray arr, object f, bint convert=1): - ''' + """ Substitute for np.vectorize with pandas-friendly dtype inference Parameters @@ -1076,7 +1076,7 @@ def map_infer(ndarray arr, object f, bint convert=1): Returns ------- mapped : ndarray - ''' + """ cdef: Py_ssize_t i, n ndarray[object] result diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx index 892fee77eb177..c3f8bdfbfd0a6 100644 --- a/pandas/src/reduce.pyx +++ b/pandas/src/reduce.pyx @@ -19,10 +19,10 @@ cdef _get_result_array(object obj, cdef class Reducer: - ''' + """ Performs generic reduction operation on a C or Fortran-contiguous ndarray while avoiding ndarray construction overhead - ''' + """ cdef: Py_ssize_t increment, chunksize, nresults object arr, dummy, f, labels, typ, ityp, index @@ -159,9 +159,9 @@ cdef class Reducer: cdef class SeriesBinGrouper: - ''' + """ Performs grouping operation according to bin edges, rather than labels - ''' + """ cdef: Py_ssize_t nresults, ngroups bint passed_dummy @@ -284,10 +284,10 @@ cdef class SeriesBinGrouper: cdef class SeriesGrouper: - ''' + """ Performs generic grouping operation while avoiding ndarray construction overhead - ''' + """ cdef: Py_ssize_t nresults, ngroups bint passed_dummy @@ -409,8 +409,8 @@ cdef class SeriesGrouper: cdef inline _extract_result(object res): - ''' extract the result object, it might be a 0-dim ndarray - or a len-1 0-dim, or a scalar ''' + """ extract the result object, it might be a 0-dim ndarray + or a len-1 0-dim, or a scalar """ if hasattr(res,'values'): res = res.values if not np.isscalar(res): @@ -422,9 +422,9 @@ cdef inline _extract_result(object res): return res cdef class Slider: - ''' + """ Only handles contiguous data for now - ''' + """ cdef: ndarray values, buf Py_ssize_t stride, orig_len, orig_stride @@ -452,9 +452,9 @@ cdef class Slider: self.buf.data = self.buf.data + self.stride * k cdef move(self, int start, int end): - ''' + """ For slicing - ''' + """ self.buf.data = self.values.data + self.stride * start self.buf.shape[0] = end - start @@ -526,9 +526,9 @@ def apply_frame_axis0(object frame, object f, object names, return results, mutated cdef class BlockSlider: - ''' + """ Only capable of sliding on axis=0 - ''' + """ cdef public: object frame, dummy, index diff --git a/pandas/src/skiplist.pyx b/pandas/src/skiplist.pyx index 4e00fd276c729..e7db7bd5a4a02 100644 --- a/pandas/src/skiplist.pyx +++ b/pandas/src/skiplist.pyx @@ -44,10 +44,10 @@ cdef class Node: NIL = Node(np.inf, [], []) cdef class IndexableSkiplist: - ''' + """ Sorted collection supporting O(lg n) insertion, removal, and lookup by rank. - ''' + """ cdef: Py_ssize_t size, maxlevels Node head diff --git a/pandas/src/sparse.pyx b/pandas/src/sparse.pyx index 30814ed7b0bc3..29f3d61033f6a 100644 --- a/pandas/src/sparse.pyx +++ b/pandas/src/sparse.pyx @@ -32,15 +32,15 @@ cdef inline int int_min(int a, int b): return a if a <= b else b cdef class SparseIndex: - ''' + """ Abstract superclass for sparse index types - ''' + """ def __init__(self): raise NotImplementedError cdef class IntIndex(SparseIndex): - ''' + """ Object for holding exact integer sparse indexing information Parameters @@ -48,7 +48,7 @@ cdef class IntIndex(SparseIndex): length : integer indices : array-like Contains integers corresponding to - ''' + """ cdef readonly: Py_ssize_t length, npoints ndarray indices @@ -68,10 +68,10 @@ cdef class IntIndex(SparseIndex): return output def check_integrity(self): - ''' + """ Only need be strictly ascending and nothing less than 0 or greater than totall ength - ''' + """ pass def equals(self, other): @@ -305,12 +305,12 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices): # BlockIndex cdef class BlockIndex(SparseIndex): - ''' + """ Object for holding block-based sparse indexing information Parameters ---------- - ''' + """ cdef readonly: Py_ssize_t nblocks, npoints, length ndarray blocs, blengths @@ -354,12 +354,12 @@ cdef class BlockIndex(SparseIndex): return self.length - self.npoints cpdef check_integrity(self): - ''' + """ Check: - Locations are in ascending order - No overlapping blocks - Blocks to not start after end of index, nor extend beyond end - ''' + """ cdef: Py_ssize_t i ndarray[int32_t, ndim=1] blocs, blengths @@ -419,7 +419,7 @@ cdef class BlockIndex(SparseIndex): return IntIndex(self.length, indices) cpdef BlockIndex intersect(self, SparseIndex other): - ''' + """ Intersect two BlockIndex objects Parameters @@ -428,7 +428,7 @@ cdef class BlockIndex(SparseIndex): Returns ------- intersection : BlockIndex - ''' + """ cdef: BlockIndex y ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen @@ -497,7 +497,7 @@ cdef class BlockIndex(SparseIndex): return BlockIndex(self.length, out_blocs, out_blengths) cpdef BlockIndex make_union(self, SparseIndex y): - ''' + """ Combine together two BlockIndex objects, accepting indices if contained in one or the other @@ -512,7 +512,7 @@ cdef class BlockIndex(SparseIndex): Returns ------- union : BlockIndex - ''' + """ return BlockUnion(self, y.to_block_index()).result cpdef int lookup(self, Py_ssize_t index): @@ -604,10 +604,10 @@ cdef class BlockIndex(SparseIndex): cdef class BlockMerge(object): - ''' + """ Object-oriented approach makes sharing state between recursive functions a lot easier and reduces code duplication - ''' + """ cdef: BlockIndex x, y, result ndarray xstart, xlen, xend, ystart, ylen, yend @@ -646,16 +646,16 @@ cdef class BlockMerge(object): self.yi = xi cdef class BlockIntersection(BlockMerge): - ''' + """ not done yet - ''' + """ pass cdef class BlockUnion(BlockMerge): - ''' + """ Object-oriented approach makes sharing state between recursive functions a lot easier and reduces code duplication - ''' + """ cdef _make_merged_blocks(self): cdef: @@ -697,12 +697,12 @@ cdef class BlockUnion(BlockMerge): return BlockIndex(self.x.length, out_blocs, out_blengths) cdef int32_t _find_next_block_end(self, bint mode) except -1: - ''' + """ Wow, this got complicated in a hurry mode 0: block started in index x mode 1: block started in index y - ''' + """ cdef: ndarray[int32_t, ndim=1] xstart, xend, ystart, yend int32_t xi, yi, xnblocks, ynblocks, nend @@ -782,9 +782,9 @@ cdef inline tuple sparse_combine(ndarray x, SparseIndex xindex, float64_t xfill, cdef inline tuple block_op(ndarray x_, BlockIndex xindex, float64_t xfill, ndarray y_, BlockIndex yindex, float64_t yfill, double_func op): - ''' + """ Binary operator on BlockIndex objects with fill values - ''' + """ cdef: BlockIndex out_index @@ -1139,10 +1139,10 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map): # cdef class SparseCruncher(object): -# ''' +# """ # Class to acquire float pointer for convenient operations on sparse data # structures -# ''' +# """ # cdef: # SparseIndex index # float64_t* buf diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 8e659d42bab25..725a4e8296dd2 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -265,9 +265,9 @@ class TestOLSMisc(tm.TestCase): _multiprocess_can_split_ = True - ''' + """ For test coverage with faux data - ''' + """ @classmethod def setUpClass(cls): super(TestOLSMisc, cls).setUpClass() diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 154837fc2a3b1..6e0a0175b403f 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -231,12 +231,12 @@ def test_map_na_exclusion(self): assert_series_equal(result, exp) def test_map_dict_with_tuple_keys(self): - ''' + """ Due to new MultiIndex-ing behaviour in v0.14.0, dicts with tuple keys passed to map were being converted to a multi-index, preventing tuple values from being mapped properly. - ''' + """ df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]}) label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'} df['labels'] = df['a'].map(label_mappings) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 06046accaa0d9..c5ef969d3b39d 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1360,7 +1360,7 @@ def test_operators_na_handling(self): assert_series_equal(result, expected) def test_divide_decimal(self): - ''' resolves issue #9787 ''' + """ resolves issue #9787 """ from decimal import Decimal expected = Series([Decimal(5)]) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 1f9f7d43e8568..4dd9a2653d687 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -850,13 +850,13 @@ def test_extractall(self): ("rob", "gmail", "com"), ("steve", "gmail", "com"), ("a", "b", "com"), ("c", "d", "com"), ("e", "f", "com"), ] - named_pattern = r''' + named_pattern = r""" (?P[a-z0-9]+) @ (?P[a-z]+) \. (?P[a-z]{2,4}) - ''' + """ expected_columns = ["user", "domain", "tld"] S = Series(subject_list) # extractall should return a DataFrame with one row for each diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 1433ce65b3021..334d5e3b2bd80 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1413,7 +1413,7 @@ def _get_colors(self, num_colors=None, color_kwds='color'): color=self.kwds.get(color_kwds)) def _parse_errorbars(self, label, err): - ''' + """ Look for error keyword arguments and return the actual errorbar data or return the error DataFrame/dict @@ -1424,7 +1424,7 @@ def _parse_errorbars(self, label, err): DataFrame/dict: error values are paired with keys matching the key in the plotted DataFrame str: the name of the column within the plotted DataFrame - ''' + """ if err is None: return None diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 3b7becdf64a10..cef5dad72e50b 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -14,7 +14,7 @@ def match(needles, haystack): def cartesian_product(X): - ''' + """ Numpy version of itertools.product or pandas.compat.product. Sometimes faster (for large inputs)... @@ -24,7 +24,7 @@ def cartesian_product(X): [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), array([1, 2, 1, 2, 1, 2])] - ''' + """ lenX = np.fromiter((len(x) for x in X), dtype=int) cumprodX = np.cumproduct(lenX) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 48dfb5eb014f3..746163069d3e8 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -2148,9 +2148,9 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, def _to_m8(key, tz=None): - ''' + """ Timestamp-like => dt64 - ''' + """ if not isinstance(key, Timestamp): # this also converts strings key = Timestamp(key, tz=tz) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 01ed4b65fbaee..6e1ab20fa6560 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -958,7 +958,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['calendar'] = self.calendar = calendar def get_calendar(self, weekmask, holidays, calendar): - '''Generate busdaycalendar''' + """Generate busdaycalendar""" if isinstance(calendar, np.busdaycalendar): if not holidays: holidays = tuple(calendar.holidays) @@ -2422,12 +2422,12 @@ def _from_name(cls, *args): class Easter(DateOffset): - ''' + """ DateOffset for the Easter holiday using logic defined in dateutil. Right now uses the revised method which is valid in years 1583-4099. - ''' + """ _adjust_dst = True def __init__(self, n=1, **kwds): diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 423ccea7d4673..7d731c28c0f88 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -932,9 +932,9 @@ def _is_convertible_to_td(key): def _to_m8(key): - ''' + """ Timedelta-like => dt64 - ''' + """ if not isinstance(key, Timedelta): # this also converts strings key = Timedelta(key) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index cb0b76f5d81f2..3961a8b99b4dd 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -845,21 +845,21 @@ def setUp(self): tm._skip_if_no_dateutil() def tz(self, tz): - ''' + """ Construct a dateutil timezone. Use tslib.maybe_get_tz so that we get the filename on the tz right on windows. See #7337. - ''' + """ return tslib.maybe_get_tz('dateutil/' + tz) def tzstr(self, tz): - ''' Construct a timezone string from a string. Overridden in subclass - to parameterize tests. ''' + """ Construct a timezone string from a string. Overridden in subclass + to parameterize tests. """ return 'dateutil/' + tz def cmptz(self, tz1, tz2): - ''' Compare two timezones. Overridden in subclass to parameterize - tests. ''' + """ Compare two timezones. Overridden in subclass to parameterize + tests. """ return tz1 == tz2 def localize(self, tz, x): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 50bb03bb274aa..ccd1bdb08ebd0 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -459,7 +459,7 @@ def test_pprint(self): nested_obj = {'foo': 1, 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} result = pprint.pformat(nested_obj, width=50) - expected = r'''{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, @@ -469,7 +469,7 @@ def test_pprint(self): {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], - 'foo': 1}''' + 'foo': 1}""" self.assertEqual(result, expected) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 262d83d6a50b2..bd6c72e1a7a1c 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1393,10 +1393,10 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, return convert_to_tsobject(ts, tz, unit) def _test_parse_iso8601(object ts): - ''' + """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used only for testing, actual construction uses `convert_str_to_tsobject` - ''' + """ cdef: _TSObject obj int out_local = 0, out_tzoffset = 0 @@ -1414,9 +1414,9 @@ def _test_parse_iso8601(object ts): return Timestamp(obj.value) cdef inline void _localize_tso(_TSObject obj, object tz): - ''' + """ Take a TSObject in UTC and localizes to timezone tz. - ''' + """ if _is_utc(tz): obj.tzinfo = tz elif _is_tzlocal(tz): @@ -1457,9 +1457,9 @@ cdef inline void _localize_tso(_TSObject obj, object tz): def _localize_pydatetime(object dt, object tz): - ''' + """ Take a datetime/Timestamp in UTC and localizes to timezone tz. - ''' + """ if tz is None: return dt elif isinstance(dt, Timestamp): @@ -1480,7 +1480,7 @@ cdef inline bint _is_utc(object tz): return tz is UTC or isinstance(tz, _dateutil_tzutc) cdef inline object _get_zone(object tz): - ''' + """ We need to do several things here: 1/ Distinguish between pytz and dateutil timezones 2/ Not be over-specific (e.g. US/Eastern with/without DST is same *zone* but a different tz object) @@ -1488,7 +1488,7 @@ cdef inline object _get_zone(object tz): We return a string prefaced with dateutil if it's a dateutil tz, else just the tz name. It needs to be a string so that we can serialize it with UJSON/pytables. maybe_get_tz (below) is the inverse of this process. - ''' + """ if _is_utc(tz): return 'UTC' else: @@ -1510,10 +1510,10 @@ cdef inline object _get_zone(object tz): cpdef inline object maybe_get_tz(object tz): - ''' + """ (Maybe) Construct a timezone object from a string. If tz is a string, use it to construct a timezone object. Otherwise, just return tz. - ''' + """ if isinstance(tz, string_types): if tz.startswith('dateutil/'): zone = tz[9:] @@ -3459,10 +3459,10 @@ def cast_to_nanoseconds(ndarray arr): def pydt_to_i8(object pydt): - ''' + """ Convert to int64 representation compatible with numpy datetime64; converts to UTC - ''' + """ cdef: _TSObject ts @@ -3471,9 +3471,9 @@ def pydt_to_i8(object pydt): return ts.value def i8_to_pydt(int64_t i8, object tzinfo = None): - ''' + """ Inverse of pydt_to_i8 - ''' + """ return Timestamp(i8) #---------------------------------------------------------------------- @@ -3649,7 +3649,7 @@ cdef inline bint _treat_tz_as_dateutil(object tz): def _p_tz_cache_key(tz): - ''' Python interface for cache function to facilitate testing.''' + """ Python interface for cache function to facilitate testing.""" return _tz_cache_key(tz) @@ -3741,10 +3741,10 @@ cdef object _get_dst_info(object tz): return dst_cache[cache_key] cdef object _get_utc_trans_times_from_dateutil_tz(object tz): - ''' + """ Transition times in dateutil timezones are stored in local non-dst time. This code converts them to UTC. It's the reverse of the code in dateutil.tz.tzfile.__init__. - ''' + """ new_trans = list(tz._trans_list) last_std_offset = 0 for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): @@ -3951,9 +3951,9 @@ cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): #---------------------------------------------------------------------- def build_field_sarray(ndarray[int64_t] dtindex): - ''' + """ Datetime as int64 representation to a structured array of fields - ''' + """ cdef: Py_ssize_t i, count = 0 int isleap @@ -3993,9 +3993,9 @@ def build_field_sarray(ndarray[int64_t] dtindex): return out def get_time_micros(ndarray[int64_t] dtindex): - ''' + """ Datetime as int64 representation to a structured array of fields - ''' + """ cdef: Py_ssize_t i, n = len(dtindex) pandas_datetimestruct dts @@ -4014,10 +4014,10 @@ def get_time_micros(ndarray[int64_t] dtindex): @cython.wraparound(False) @cython.boundscheck(False) def get_date_field(ndarray[int64_t] dtindex, object field): - ''' + """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. - ''' + """ cdef: _TSObject ts Py_ssize_t i, count = 0 @@ -4179,11 +4179,11 @@ def get_date_field(ndarray[int64_t] dtindex, object field): @cython.wraparound(False) def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=None, int month_kw=12): - ''' + """ Given an int64-based datetime index return array of indicators of whether timestamps are at the start/end of the month/quarter/year (defined by frequency). - ''' + """ cdef: _TSObject ts Py_ssize_t i @@ -4398,10 +4398,10 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N @cython.wraparound(False) @cython.boundscheck(False) def get_date_name_field(ndarray[int64_t] dtindex, object field): - ''' + """ Given a int64-based datetime index, return array of strings of date name based on requested field (e.g. weekday_name) - ''' + """ cdef: _TSObject ts Py_ssize_t i, count = 0 @@ -4587,14 +4587,14 @@ cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month-1] cpdef normalize_date(object dt): - ''' + """ Normalize datetime.datetime value to midnight. Returns datetime.date as a datetime.datetime at midnight Returns ------- normalized : datetime.datetime or Timestamp - ''' + """ if PyDateTime_Check(dt): return dt.replace(hour=0, minute=0, second=0, microsecond=0) elif PyDate_Check(dt): @@ -4605,19 +4605,19 @@ cpdef normalize_date(object dt): cdef inline int _year_add_months(pandas_datetimestruct dts, int months) nogil: - '''new year number after shifting pandas_datetimestruct number of months''' + """new year number after shifting pandas_datetimestruct number of months""" return dts.year + (dts.month + months - 1) / 12 cdef inline int _month_add_months(pandas_datetimestruct dts, int months) nogil: - '''new month number after shifting pandas_datetimestruct number of months''' + """new month number after shifting pandas_datetimestruct number of months""" cdef int new_month = (dts.month + months) % 12 return 12 if new_month == 0 else new_month @cython.wraparound(False) @cython.boundscheck(False) def shift_months(int64_t[:] dtindex, int months, object day=None): - ''' + """ Given an int64-based datetime index, shift all elements specified number of months using DateOffset semantics @@ -4625,7 +4625,7 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): * None: day of month * 'start' 1st day of month * 'end' last day of month - ''' + """ cdef: Py_ssize_t i pandas_datetimestruct dts diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 0a59c2e8eb1c3..58cd0c13d8ec7 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -205,7 +205,7 @@ def wrapped(*args, **kwargs): class KnownFailureTest(Exception): - '''Raise this exception to mark a test as a known failing test.''' + """Raise this exception to mark a test as a known failing test.""" pass