Skip to content

CLN: cython and docstring cleanups #29089

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@ dtypes = [('float64', 'float64_t', 'float64_t'),
def get_dispatch(dtypes):

for name, c_type, dest_type, in dtypes:
dest_name = dest_type[:-2] # i.e. strip "_t"
yield name, c_type, dest_type, dest_name
yield name, c_type, dest_type

}}

{{for name, c_type, dest_type, dest_name
{{for name, c_type, dest_type
in get_dispatch(dtypes)}}


Expand Down
62 changes: 33 additions & 29 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,28 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in

{{py:

# name, dest, c_type_in, c_type_out, preval, postval, can_copy, nogil
# c_type_in, c_type_out, preval, postval
dtypes = [
('bool', 'bool', 'uint8_t', 'uint8_t', '', '', True),
('bool', 'object', 'uint8_t', 'object',
'True if ', ' > 0 else False', False),
('int8', 'int8', 'int8_t', 'int8_t', '', '', True),
('int8', 'int32', 'int8_t', 'int32_t', '', '', False),
('int8', 'int64', 'int8_t', 'int64_t', '', '', False),
('int8', 'float64', 'int8_t', 'float64_t', '', '', False),
('int16', 'int16', 'int16_t', 'int16_t', '', '', True),
('int16', 'int32', 'int16_t', 'int32_t', '', '', False),
('int16', 'int64', 'int16_t', 'int64_t', '', '', False),
('int16', 'float64', 'int16_t', 'float64_t', '', '', False),
('int32', 'int32', 'int32_t', 'int32_t', '', '', True),
('int32', 'int64', 'int32_t', 'int64_t', '', '', False),
('int32', 'float64', 'int32_t', 'float64_t', '', '', False),
('int64', 'int64', 'int64_t', 'int64_t', '', '', True),
('int64', 'float64', 'int64_t', 'float64_t', '', '', False),
('float32', 'float32', 'float32_t', 'float32_t', '', '', True),
('float32', 'float64', 'float32_t', 'float64_t', '', '', False),
('float64', 'float64', 'float64_t', 'float64_t', '', '', True),
('object', 'object', 'object', 'object', '', '', False)]
('uint8_t', 'uint8_t', '', ''),
('uint8_t', 'object', 'True if ', ' > 0 else False'),
('int8_t', 'int8_t', '', ''),
('int8_t', 'int32_t', '', ''),
('int8_t', 'int64_t', '', ''),
('int8_t', 'float64_t', '', ''),
('int16_t', 'int16_t', '', ''),
('int16_t', 'int32_t', '', ''),
('int16_t', 'int64_t', '', ''),
('int16_t', 'float64_t', '', ''),
('int32_t', 'int32_t', '', ''),
('int32_t', 'int64_t', '', ''),
('int32_t', 'float64_t', '', ''),
('int64_t', 'int64_t', '', ''),
('int64_t', 'float64_t', '', ''),
('float32_t', 'float32_t', '', ''),
('float32_t', 'float64_t', '', ''),
('float64_t', 'float64_t', '', ''),
('object', 'object', '', ''),
]


def get_dispatch(dtypes):
Expand Down Expand Up @@ -117,9 +117,9 @@ def get_dispatch(dtypes):
out[i, j] = %(preval)svalues[i, idx]%(postval)s
"""

for (name, dest, c_type_in, c_type_out, preval, postval,
can_copy) in dtypes:
for (c_type_in, c_type_out, preval, postval) in dtypes:

can_copy = c_type_in == c_type_out != "object"
nogil = c_type_out != "object"
if nogil:
nogil_str = "with nogil:"
Expand All @@ -128,6 +128,16 @@ def get_dispatch(dtypes):
nogil_str = ''
tab = ''

def get_name(dtype_name):
if dtype_name == "object":
return "object"
if dtype_name == "uint8_t":
return "bool"
return dtype_name[:-2]

name = get_name(c_type_in)
dest = get_name(c_type_out)

args = dict(name=name, dest=dest, c_type_in=c_type_in,
c_type_out=c_type_out, preval=preval, postval=postval,
can_copy=can_copy, nogil_str=nogil_str, tab=tab)
Expand Down Expand Up @@ -291,9 +301,3 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
for j in range(K):
result[i, j] = values[i, indexer[i, j]]
return result


_take_2d_object = _take_2d[object]
_take_2d_float64 = _take_2d[float64_t]
_take_2d_int64 = _take_2d[int64_t]
_take_2d_uint64 = _take_2d[uint64_t]
6 changes: 4 additions & 2 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,14 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):

if keep == 'last':
{{if dtype == 'object'}}
for i from n > i >= 0:
for i in range(n - 1, -1, -1):
# equivalent: range(n)[::-1], which cython doesnt like in nogil
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
for i from n > i >= 0:
for i in range(n - 1, -1, -1):
# equivalent: range(n)[::-1], which cython doesnt like in nogil
kh_put_{{ttype}}(table, values[i], &ret)
out[i] = ret == 0
{{endif}}
Expand Down
24 changes: 9 additions & 15 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@ from pandas._libs.algos import (
)


@cython.boundscheck(False)
def inner_join(const int64_t[:] left, const int64_t[:] right,
Py_ssize_t max_groups):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
ndarray[int64_t] left_indexer, right_indexer
int64_t lc, rc
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
Py_ssize_t offset

# NA group in location 0

Expand All @@ -34,11 +37,6 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
if rc > 0 and lc > 0:
count += lc * rc

# group 0 is the NA group
cdef:
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
Py_ssize_t offset

# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]
Expand All @@ -64,6 +62,7 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
_get_result_indexer(right_sorter, right_indexer))


@cython.boundscheck(False)
def left_outer_join(const int64_t[:] left, const int64_t[:] right,
Py_ssize_t max_groups, sort=True):
cdef:
Expand All @@ -72,6 +71,8 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
ndarray rev
ndarray[int64_t] left_indexer, right_indexer
int64_t lc, rc
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
Py_ssize_t offset

# NA group in location 0

Expand All @@ -85,11 +86,6 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
else:
count += left_count[i]

# group 0 is the NA group
cdef:
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
Py_ssize_t offset

# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]
Expand Down Expand Up @@ -137,13 +133,16 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
return left_indexer, right_indexer


@cython.boundscheck(False)
def full_outer_join(const int64_t[:] left, const int64_t[:] right,
Py_ssize_t max_groups):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
ndarray[int64_t] left_indexer, right_indexer
int64_t lc, rc
int64_t left_pos = 0, right_pos = 0
Py_ssize_t offset, position = 0

# NA group in location 0

Expand All @@ -160,11 +159,6 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right,
else:
count += lc + rc

# group 0 is the NA group
cdef:
int64_t left_pos = 0, right_pos = 0
Py_ssize_t offset, position = 0

# exclude the NA group
left_pos = left_count[0]
right_pos = right_count[0]
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/sparse_op_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def get_op(tup):
'and': '{0} & {1}', # logical op
'or': '{0} | {1}'}

return ops_dict[opname].format(lval, rval, dtype)
return ops_dict[opname].format(lval, rval)


def get_dispatch(dtypes):
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values,
# The original impl didn't deal with variable window sizes
# So the code was optimized for that

for i from starti[0] <= i < endi[0]:
for i in range(starti[0], endi[0]):
ai = init_mm(values[i], &nobs, is_max)

# Discard previous entries if we find new min or max
Expand Down Expand Up @@ -1644,7 +1644,7 @@ def roll_generic(object obj,
else:

# truncated windows at the beginning, through first full-length window
for i from 0 <= i < (int_min(win, N) - offset):
for i in range((int_min(win, N) - offset)):
if counts[i] >= minp:
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
else:
Expand All @@ -1654,7 +1654,7 @@ def roll_generic(object obj,
buf = <float64_t *>arr.data
bufarr = np.empty(win, dtype=float)
oldbuf = <float64_t *>bufarr.data
for i from (win - offset) <= i < (N - offset):
for i in range((win - offset), (N - offset)):
buf = buf + 1
bufarr.data = <char *>buf
if counts[i] >= minp:
Expand All @@ -1664,7 +1664,7 @@ def roll_generic(object obj,
bufarr.data = <char *>oldbuf

# truncated windows at the end
for i from int_max(N - offset, 0) <= i < N:
for i in range(int_max(N - offset, 0), N):
if counts[i] >= minp:
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
*args,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ def _create_method(cls, op, coerce_to_dtype=True):
----------
op : function
An operator that takes arguments op(a, b)
coerce_to_dtype : bool, default True
coerce_to_dtype : bool, default True
boolean indicating whether to attempt to convert
the result to the underlying ExtensionArray dtype.
If it's not possible to create a new ExtensionArray with the
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2481,7 +2481,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
like searching / selecting subsets of the data.
append : bool, default False
For Table formats, append the input data to the existing.
data_columns : list of columns or True, optional
data_columns : list of columns or True, optional
List of columns to create as indexed data columns for on-disk
queries, or True to use all columns. By default only the axes
of the object are indexed. See :ref:`io.hdf5-query-data-columns`.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2080,7 +2080,7 @@ def rank(
* dense: like 'min', but rank always increases by 1 between groups
ascending : bool, default True
False for ranks by high (1) to low (N).
na_option : {'keep', 'top', 'bottom'}, default 'keep'
na_option : {'keep', 'top', 'bottom'}, default 'keep'
* keep: leave NA values where they are
* top: smallest rank if ascending
* bottom: smallest rank if descending
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2031,7 +2031,7 @@ def fillna(self, value=None, downcast=None):

Parameters
----------
how : {'any', 'all'}, default 'any'
how : {'any', 'all'}, default 'any'
If the Index is a MultiIndex, drop the value when any or all levels
are NaN.

Expand Down Expand Up @@ -5016,12 +5016,11 @@ def _validate_indexer(self, form, key, kind):

Returns
-------
label : object
label : object

Notes
-----
Value of `side` parameter should be validated in caller.

"""

@Appender(_index_shared_docs["_maybe_cast_slice_bound"])
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,12 +1079,11 @@ def _maybe_cast_slice_bound(self, label, side, kind):

Returns
-------
label : object
label : object

Notes
-----
Value of `side` parameter should be validated in caller.

"""
assert kind in ["ix", "loc", "getitem", None]

Expand Down
4 changes: 1 addition & 3 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,6 @@ def _maybe_cast_slice_bound(self, label, side, kind):
"""
If label is a string, cast it to timedelta according to resolution.
Parameters
----------
label : object
Expand All @@ -559,8 +558,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
Returns
-------
label : object
label : object
"""
assert kind in ["ix", "loc", "getitem", None]

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
----------
xi : array_like
A sorted list of x-coordinates, of length N.
yi : array_like
yi : array_like
A 1-D array of real values. `yi`'s length along the interpolation
axis must be equal to the length of `xi`. If N-D array, use axis
parameter to select correct axis.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/ops/docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def _make_flex_doc(op_name, typ):
----------
other : scalar, sequence, Series, or DataFrame
Any single or multiple element data structure, or list-like object.
axis : {{0 or 'index', 1 or 'columns'}}
axis : {{0 or 'index', 1 or 'columns'}}
Whether to compare by the index (0 or 'index') or columns
(1 or 'columns'). For Series input, axis to match Series index on.
level : int or label
Expand Down Expand Up @@ -541,7 +541,7 @@ def _make_flex_doc(op_name, typ):
----------
other : scalar, sequence, Series, or DataFrame
Any single or multiple element data structure, or list-like object.
axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
Whether to compare by the index (0 or 'index') or columns
(1 or 'columns').
level : int or label
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1343,7 +1343,7 @@ def str_pad(arr, width, side="left", fillchar=" "):
character. Equivalent to ``Series.str.pad(side='right')``.
Series.str.center : Fills boths sides of strings with an arbitrary
character. Equivalent to ``Series.str.pad(side='both')``.
Series.str.zfill : Pad strings in the Series/Index by prepending '0'
Series.str.zfill : Pad strings in the Series/Index by prepending '0'
character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.

Examples
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ def append(
/ selecting subsets of the data
append : bool, default True
Append the input data to the existing.
data_columns : list of columns, or True, default None
data_columns : list of columns, or True, default None
List of columns to create as indexed data columns for on-disk
queries, or True to use all columns. By default only the axes
of the object are indexed. See `here
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/plotting/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def _check_ax_scales(self, axes, xaxis="linear", yaxis="linear"):
axes : matplotlib Axes object, or its list-like
xaxis : {'linear', 'log'}
expected xaxis scale
yaxis : {'linear', 'log'}
yaxis : {'linear', 'log'}
expected yaxis scale
"""
axes = self._flatten_visible(axes)
Expand All @@ -329,7 +329,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None):
axes_num : number
expected number of axes. Unnecessary axes should be set to
invisible.
layout : tuple
layout : tuple
expected layout, (expected number of rows , columns)
figsize : tuple
expected figsize. default is matplotlib default
Expand Down