Skip to content

CLN: Assorted cleanups #29175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ from pandas._libs.tslibs.util cimport get_c_string

{{py:

# name, dtype, arg
# name, dtype, c_type
# the generated StringVector is not actually used
# but is included for completeness (rather ObjectVector is used
# for uniques in hashtables)
Expand All @@ -24,13 +24,13 @@ dtypes = [('Float64', 'float64', 'float64_t'),
('UInt64', 'uint64', 'uint64_t')]
}}

{{for name, dtype, arg in dtypes}}
{{for name, dtype, c_type in dtypes}}


{{if dtype != 'int64'}}

ctypedef struct {{name}}VectorData:
{{arg}} *data
{{c_type}} *data
Py_ssize_t n, m

{{endif}}
Expand All @@ -39,7 +39,7 @@ ctypedef struct {{name}}VectorData:
@cython.wraparound(False)
@cython.boundscheck(False)
cdef inline void append_data_{{dtype}}({{name}}VectorData *data,
{{arg}} x) nogil:
{{c_type}} x) nogil:

data.data[data.n] = x
data.n += 1
Expand All @@ -61,14 +61,14 @@ cdef inline bint needs_resize(vector_data *data) nogil:

{{py:

# name, dtype, arg, idtype
dtypes = [('Float64', 'float64', 'float64_t', 'np.float64'),
('UInt64', 'uint64', 'uint64_t', 'np.uint64'),
('Int64', 'int64', 'int64_t', 'np.int64')]
# name, dtype, c_type
dtypes = [('Float64', 'float64', 'float64_t'),
('UInt64', 'uint64', 'uint64_t'),
('Int64', 'int64', 'int64_t')]

}}

{{for name, dtype, arg, idtype in dtypes}}
{{for name, dtype, c_type in dtypes}}

cdef class {{name}}Vector:

Expand All @@ -87,13 +87,13 @@ cdef class {{name}}Vector:
self.external_view_exists = False
self.data.n = 0
self.data.m = _INIT_VEC_CAP
self.ao = np.empty(self.data.m, dtype={{idtype}})
self.data.data = <{{arg}}*>self.ao.data
self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
self.data.data = <{{c_type}}*>self.ao.data

cdef resize(self):
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
self.ao.resize(self.data.m, refcheck=False)
self.data.data = <{{arg}}*>self.ao.data
self.data.data = <{{c_type}}*>self.ao.data

def __dealloc__(self):
if self.data is not NULL:
Expand All @@ -113,7 +113,7 @@ cdef class {{name}}Vector:
self.external_view_exists = True
return self.ao

cdef inline void append(self, {{arg}} x):
cdef inline void append(self, {{c_type}} x):

if needs_resize(self.data):
if self.external_view_exists:
Expand All @@ -123,7 +123,7 @@ cdef class {{name}}Vector:

append_data_{{dtype}}(self.data, x)

cdef extend(self, const {{arg}}[:] x):
cdef extend(self, const {{c_type}}[:] x):
for i in range(len(x)):
self.append(x[i])

Expand Down Expand Up @@ -279,7 +279,8 @@ cdef class {{name}}HashTable(HashTable):
self.table = NULL

def __contains__(self, object key):
cdef khiter_t k
cdef:
khiter_t k
k = kh_get_{{dtype}}(self.table, key)
return k != self.table.n_buckets

Expand All @@ -290,7 +291,8 @@ cdef class {{name}}HashTable(HashTable):
sizeof(uint32_t)) # flags

cpdef get_item(self, {{dtype}}_t val):
cdef khiter_t k
cdef:
khiter_t k
k = kh_get_{{dtype}}(self.table, val)
if k != self.table.n_buckets:
return self.table.vals[k]
Expand Down Expand Up @@ -899,7 +901,8 @@ cdef class PyObjectHashTable(HashTable):
return self.table.size

def __contains__(self, object key):
cdef khiter_t k
cdef:
khiter_t k
hash(key)

k = kh_get_pymap(self.table, <PyObject*>key)
Expand All @@ -912,7 +915,8 @@ cdef class PyObjectHashTable(HashTable):
sizeof(uint32_t)) # flags

cpdef get_item(self, object val):
cdef khiter_t k
cdef:
khiter_t k

k = kh_get_pymap(self.table, <PyObject*>val)
if k != self.table.n_buckets:
Expand Down
29 changes: 8 additions & 21 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,17 @@ Template for each `dtype` helper function for hashtable
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

# ----------------------------------------------------------------------
# VectorData
# ----------------------------------------------------------------------

{{py:

# dtype, ttype
# dtype, ttype, c_type
dtypes = [('float64', 'float64', 'float64_t'),
('uint64', 'uint64', 'uint64_t'),
('object', 'pymap', 'object'),
('int64', 'int64', 'int64_t')]

}}

{{for dtype, ttype, scalar in dtypes}}
{{for dtype, ttype, c_type in dtypes}}


@cython.wraparound(False)
Expand All @@ -34,7 +30,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
khiter_t k
Py_ssize_t i, n = len(values)

{{scalar}} val
{{c_type}} val

int ret = 0

Expand Down Expand Up @@ -77,7 +73,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
{{if dtype == 'object'}}
cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
{{else}}
cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna):
{{endif}}
cdef:
Py_ssize_t i = 0
Expand Down Expand Up @@ -127,13 +123,9 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}


def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
{{else}}


def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'):
{{endif}}
cdef:
int ret = 0
Expand Down Expand Up @@ -212,15 +204,10 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}


def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values):
def ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values):
{{else}}


def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
def ismember_{{dtype}}({{c_type}}[:] arr, {{c_type}}[:] values):
{{endif}}

"""
Return boolean of values in arr on an
element by-element basis
Expand All @@ -238,7 +225,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
Py_ssize_t i, n, k
int ret = 0
ndarray[uint8_t] result
{{scalar}} val
{{c_type}} val
kh_{{ttype}}_t *table = kh_init_{{ttype}}()

# construct the table
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
return start, stop, step, length


def slice_getitem(slice slc not None, ind):
cdef slice_getitem(slice slc, ind):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a not None check elsewhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not None is not allowed in a cdef function. we only call this from one place in this module, and it is immediately after checking that we do in fact have a slice

cdef:
Py_ssize_t s_start, s_stop, s_step, s_len
Py_ssize_t ind_start, ind_stop, ind_step, ind_len
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cnp.import_array()

cimport pandas._libs.util as util

from pandas._libs.hashtable cimport Int64Vector, Int64VectorData
from pandas._libs.hashtable cimport Int64Vector
from pandas._libs.tslibs.util cimport is_integer_object, is_float_object

from pandas._libs.tslibs import Timestamp
Expand Down
5 changes: 1 addition & 4 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ import warnings
import cython
from cython import Py_ssize_t

from cpython.list cimport PyList_New
from cpython.object cimport (PyObject_Str, PyObject_RichCompareBool, Py_EQ,
Py_SIZE)
from cpython.object cimport PyObject_RichCompareBool, Py_EQ
from cpython.ref cimport Py_INCREF
from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New
from cpython.unicode cimport PyUnicode_Join

from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyTime_Check, PyDelta_Check,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10852,7 +10852,7 @@ def transform(self, func, *args, **kwargs):
Also returns None for empty %(klass)s.
"""

def _find_valid_index(self, how):
def _find_valid_index(self, how: str):
"""
Retrieves the index of the first valid value.

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/util/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import numpy as np

from pandas._libs import Timestamp
import pandas._libs.hashing as hashing
import pandas._libs.tslibs as tslibs

from pandas.core.dtypes.cast import infer_dtype_from_scalar
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -337,8 +337,8 @@ def _hash_scalar(val, encoding: str = "utf8", hash_key=None):
# for tz-aware datetimes, we need the underlying naive UTC value and
# not the tz aware object or pd extension type (as
# infer_dtype_from_scalar would do)
if not isinstance(val, tslibs.Timestamp):
val = tslibs.Timestamp(val)
if not isinstance(val, Timestamp):
val = Timestamp(val)
val = val.tz_convert(None)

dtype, val = infer_dtype_from_scalar(val)
Expand Down