Skip to content

Cleanup cimports, implement bits of numpy_helper in util.pxd #21878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
# -*- coding: utf-8 -*-
# cython: profile=False

cimport numpy as cnp
import numpy as np

cimport cython
from cython cimport Py_ssize_t

cnp.import_array()
from libc.stdlib cimport malloc, free

import numpy as np
from numpy cimport (ndarray,
double_t,
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t, float32_t, float64_t)

from libc.stdlib cimport malloc, free

from util cimport numeric, get_nat

from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
Expand Down Expand Up @@ -74,8 +73,8 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
double_t x, t

l = 0
m = n -1
while (l<m):
m = n - 1
while l < m:
x = a[k]
i = l
j = m
Expand Down
10 changes: 4 additions & 6 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@ from cpython.slice cimport PySlice_Check
import numpy as np
cimport numpy as cnp
from numpy cimport (ndarray, float64_t, int32_t,
int64_t, uint8_t, uint64_t, intp_t)
int64_t, uint8_t, uint64_t, intp_t,
# Note: NPY_DATETIME, NPY_TIMEDELTA are only available
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob can remove this comment (future PR ok)

# for cimport in cython>=0.27.3
NPY_DATETIME, NPY_TIMEDELTA)
cnp.import_array()

cdef extern from "numpy/arrayobject.h":
# These can be cimported directly from numpy in cython>=0.27.3
cdef enum NPY_TYPES:
NPY_DATETIME
NPY_TIMEDELTA

cimport util

Expand Down
19 changes: 12 additions & 7 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
cimport numpy as cnp
import numpy as np
# -*- coding: utf-8 -*-
import numbers

from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
PyObject_RichCompare)

cimport util
cimport cython
import cython
from cython cimport Py_ssize_t

import numpy as np
from numpy cimport ndarray


cimport util

from tslibs import Timestamp
from tslibs.timezones cimport tz_compare

from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
PyObject_RichCompare)

import numbers
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])


Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -753,4 +753,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys,
return result


include "inference.pyx"
include "src/inference.pyx"
18 changes: 0 additions & 18 deletions pandas/_libs/src/numpy_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,6 @@ PANDAS_INLINE PyObject* get_value_1d(PyArrayObject* ap, Py_ssize_t i) {
return PyArray_Scalar(item, PyArray_DESCR(ap), (PyObject*)ap);
}

// returns ASCII or UTF8 (py3) view on python str
// python object owns memory, should not be freed
PANDAS_INLINE const char* get_c_string(PyObject* obj) {
#if PY_VERSION_HEX >= 0x03000000
return PyUnicode_AsUTF8(obj);
#else
return PyString_AsString(obj);
#endif
}

PANDAS_INLINE PyObject* char_to_string(const char* data) {
#if PY_VERSION_HEX >= 0x03000000
return PyUnicode_FromString(data);
#else
return PyString_FromString(data);
#endif
}


void set_array_not_contiguous(PyArrayObject* ao) {
ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS);
Expand Down
27 changes: 25 additions & 2 deletions pandas/_libs/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ cnp.import_array()

cimport cpython
from cpython cimport PyTypeObject
from cpython.string cimport PyString_FromString, PyString_AsString

DEF PY3 = bytes != str

cdef extern from "Python.h":
# Note: importing extern-style allows us to declare these as nogil
Expand All @@ -14,6 +17,8 @@ cdef extern from "Python.h":
bint PyFloat_Check(object obj) nogil
bint PyComplex_Check(object obj) nogil
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
char* PyUnicode_AsUTF8(object unicode)
object PyUnicode_FromString(const char* u) nogil


cdef extern from "numpy/arrayobject.h":
Expand Down Expand Up @@ -69,8 +74,6 @@ cdef extern from "numpy_helper.h":
int assign_value_1d(ndarray, Py_ssize_t, object) except -1
cnp.int64_t get_nat()
object get_value_1d(ndarray, Py_ssize_t)
char *get_c_string(object) except NULL
object char_to_string(char*)

ctypedef fused numeric:
cnp.int8_t
Expand Down Expand Up @@ -101,6 +104,26 @@ cdef extern from "headers/stdint.h":
enum: INT64_MIN


cdef inline const char* get_c_string(object obj) except NULL:
"""
returns ASCII or UTF8 (py3) view on python str
python object owns memory, should not be freed
"""
# TODO: this docstring is copied verbatim from version that was
# directly in numpy_helper.C; is it still accurate?
IF PY3:
return PyUnicode_AsUTF8(obj)
ELSE:
return PyString_AsString(obj)


cdef inline object char_to_string(const char* data):
IF PY3:
return PyUnicode_FromString(data)
ELSE:
return PyString_FromString(data)


cdef inline object get_value_at(ndarray arr, object loc):
cdef:
Py_ssize_t i, sz
Expand Down
36 changes: 18 additions & 18 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
# -*- coding: utf-8 -*-
# cython: profile=False
cimport cython
from cython cimport Py_ssize_t

from cpython cimport PyFloat_Check, PyUnicode_Check

from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyDateTime_CheckExact,
PyDateTime_IMPORT,
timedelta, datetime, date, time)
# import datetime C API
PyDateTime_IMPORT


cimport numpy as cnp
from numpy cimport int64_t, ndarray, float64_t
import numpy as np
cnp.import_array()

import pytz

from cpython cimport PyFloat_Check, PyUnicode_Check

from util cimport (is_integer_object, is_float_object, is_string_object,
is_datetime64_object)

from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyDateTime_CheckExact,
PyDateTime_IMPORT,
timedelta, datetime, date, time)
# import datetime C API
PyDateTime_IMPORT


from tslibs.np_datetime cimport (check_dts_bounds,
pandas_datetimestruct,
Expand All @@ -30,13 +35,6 @@ from tslibs.np_datetime import OutOfBoundsDatetime

from tslibs.parsing import parse_datetime_string

cimport cython
from cython cimport Py_ssize_t


import pytz


from tslibs.timedeltas cimport cast_from_unit
from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_pytz, get_dst_info)
Expand All @@ -54,7 +52,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts,
_NS_UPPER_BOUND, _NS_LOWER_BOUND)
from tslibs.timestamps import Timestamp

cdef bint PY2 = str == bytes

DEF PY2 = str == bytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should define in util.pxd maybe (future ok)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I doubt that works for DEF values, but not much downside.



cdef inline object create_datetime_from_ts(
Expand Down Expand Up @@ -556,8 +555,9 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue
if PyUnicode_Check(val) and PY2:
val = val.encode('utf-8')
if PY2:
if PyUnicode_Check(val):
val = val.encode('utf-8')

try:
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,6 @@ class _BaseOffset(object):
# other is not a DateOffset object
return False

return self._params == other._params
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this didn't break anything?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was unreachable; there was a compile-time warning about it.


def __ne__(self, other):
return not self == other

Expand Down
22 changes: 8 additions & 14 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cimport cython
from cython cimport Py_ssize_t


from datetime import datetime
from cpython.datetime cimport datetime
import time

import numpy as np
Expand All @@ -37,7 +37,7 @@ from dateutil.parser import DEFAULTPARSER
from dateutil.parser import parse as du_parse

from ccalendar import MONTH_NUMBERS
from nattype import nat_strings
from nattype import nat_strings, NaT

# ----------------------------------------------------------------------
# Constants
Expand All @@ -54,9 +54,6 @@ cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')

cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}

NAT_SENTINEL = object()
# This allows us to reference NaT without having to import it

# ----------------------------------------------------------------------


Expand Down Expand Up @@ -136,9 +133,6 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
res = parse_datetime_string_with_reso(arg, freq=freq,
dayfirst=dayfirst,
yearfirst=yearfirst)
if res[0] is NAT_SENTINEL:
from pandas._libs.tslib import NaT
res = (NaT,) + res[1:]
return res


Expand Down Expand Up @@ -206,7 +200,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
# should be NaT???

if date_string in nat_strings:
return NAT_SENTINEL, NAT_SENTINEL, ''
return NaT, NaT, ''

date_string = date_string.upper()
date_len = len(date_string)
Expand Down Expand Up @@ -407,7 +401,7 @@ def try_parse_dates(ndarray[object] values, parser=None,

# EAFP here
try:
for i from 0 <= i < n:
for i in range(n):
if values[i] == '':
result[i] = np.nan
else:
Expand All @@ -419,7 +413,7 @@ def try_parse_dates(ndarray[object] values, parser=None,
parse_date = parser

try:
for i from 0 <= i < n:
for i in range(n):
if values[i] == '':
result[i] = np.nan
else:
Expand Down Expand Up @@ -459,7 +453,7 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
else:
parse_time = time_parser

for i from 0 <= i < n:
for i in range(n):
d = parse_date(str(dates[i]))
t = parse_time(str(times[i]))
result[i] = datetime(d.year, d.month, d.day,
Expand All @@ -479,7 +473,7 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
raise ValueError('Length of years/months/days must all be equal')
result = np.empty(n, dtype='O')

for i from 0 <= i < n:
for i in range(n):
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))

return result
Expand All @@ -505,7 +499,7 @@ def try_parse_datetime_components(ndarray[object] years,
raise ValueError('Length of all datetime components must be equal')
result = np.empty(n, dtype='O')

for i from 0 <= i < n:
for i in range(n):
float_secs = float(seconds[i])
secs = int(float_secs)

Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ from conversion cimport tz_convert_utc_to_tzlocal
from frequencies cimport (get_freq_code, get_base_alias,
get_to_timestamp_base, get_freq_str,
get_rule_month)
from parsing import parse_time_string, NAT_SENTINEL
from parsing import parse_time_string
from resolution import Resolution
from nattype import nat_strings, NaT, iNaT
from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike
from offsets cimport to_offset
from offsets import _Tick

cdef bint PY2 = str == bytes
DEF PY2 = str == bytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same



cdef extern from "period_helper.h":
Expand Down Expand Up @@ -729,7 +729,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt):

result = result.replace(str_extra_fmts[i], repl)

if PY2:
IF PY2:
result = result.decode('utf-8', 'ignore')

return result
Expand Down Expand Up @@ -1820,7 +1820,7 @@ class Period(_Period):
value = str(value)
value = value.upper()
dt, _, reso = parse_time_string(value, freq)
if dt is NAT_SENTINEL:
if dt is NaT:
ordinal = iNaT

if freq is None:
Expand Down
Loading