Skip to content

Commit 76de7b4

Browse files
jbrockmendelvictor
authored and
victor
committed
Cleanup cimports, implement bits of numpy_helper in util.pxd (pandas-dev#21878)
1 parent b035b1c commit 76de7b4

File tree

11 files changed

+124
-129
lines changed

11 files changed

+124
-129
lines changed

pandas/_libs/groupby.pyx

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
33

4-
cimport numpy as cnp
5-
import numpy as np
6-
74
cimport cython
5+
from cython cimport Py_ssize_t
86

9-
cnp.import_array()
7+
from libc.stdlib cimport malloc, free
108

9+
import numpy as np
1110
from numpy cimport (ndarray,
1211
double_t,
1312
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
1413
uint32_t, uint64_t, float32_t, float64_t)
1514

16-
from libc.stdlib cimport malloc, free
1715

1816
from util cimport numeric, get_nat
17+
1918
from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
2019
TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
2120
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
@@ -74,8 +73,8 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
7473
double_t x, t
7574

7675
l = 0
77-
m = n -1
78-
while (l<m):
76+
m = n - 1
77+
while l < m:
7978
x = a[k]
8079
i = l
8180
j = m

pandas/_libs/index.pyx

+4-6
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@ from cpython.slice cimport PySlice_Check
99
import numpy as np
1010
cimport numpy as cnp
1111
from numpy cimport (ndarray, float64_t, int32_t,
12-
int64_t, uint8_t, uint64_t, intp_t)
12+
int64_t, uint8_t, uint64_t, intp_t,
13+
# Note: NPY_DATETIME, NPY_TIMEDELTA are only available
14+
# for cimport in cython>=0.27.3
15+
NPY_DATETIME, NPY_TIMEDELTA)
1316
cnp.import_array()
1417

15-
cdef extern from "numpy/arrayobject.h":
16-
# These can be cimported directly from numpy in cython>=0.27.3
17-
cdef enum NPY_TYPES:
18-
NPY_DATETIME
19-
NPY_TIMEDELTA
2018

2119
cimport util
2220

pandas/_libs/interval.pyx

+12-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
1-
cimport numpy as cnp
2-
import numpy as np
1+
# -*- coding: utf-8 -*-
2+
import numbers
3+
4+
from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
5+
PyObject_RichCompare)
36

4-
cimport util
57
cimport cython
6-
import cython
8+
from cython cimport Py_ssize_t
9+
10+
import numpy as np
711
from numpy cimport ndarray
12+
13+
14+
cimport util
15+
816
from tslibs import Timestamp
917
from tslibs.timezones cimport tz_compare
1018

11-
from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
12-
PyObject_RichCompare)
1319

14-
import numbers
1520
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
1621

1722

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -753,4 +753,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys,
753753
return result
754754

755755

756-
include "inference.pyx"
756+
include "src/inference.pyx"

pandas/_libs/src/numpy_helper.h

-18
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,6 @@ PANDAS_INLINE PyObject* get_value_1d(PyArrayObject* ap, Py_ssize_t i) {
3030
return PyArray_Scalar(item, PyArray_DESCR(ap), (PyObject*)ap);
3131
}
3232

33-
// returns ASCII or UTF8 (py3) view on python str
34-
// python object owns memory, should not be freed
35-
PANDAS_INLINE const char* get_c_string(PyObject* obj) {
36-
#if PY_VERSION_HEX >= 0x03000000
37-
return PyUnicode_AsUTF8(obj);
38-
#else
39-
return PyString_AsString(obj);
40-
#endif
41-
}
42-
43-
PANDAS_INLINE PyObject* char_to_string(const char* data) {
44-
#if PY_VERSION_HEX >= 0x03000000
45-
return PyUnicode_FromString(data);
46-
#else
47-
return PyString_FromString(data);
48-
#endif
49-
}
50-
5133

5234
void set_array_not_contiguous(PyArrayObject* ao) {
5335
ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS);

pandas/_libs/src/util.pxd

+25-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ cnp.import_array()
44

55
cimport cpython
66
from cpython cimport PyTypeObject
7+
from cpython.string cimport PyString_FromString, PyString_AsString
8+
9+
DEF PY3 = bytes != str
710

811
cdef extern from "Python.h":
912
# Note: importing extern-style allows us to declare these as nogil
@@ -14,6 +17,8 @@ cdef extern from "Python.h":
1417
bint PyFloat_Check(object obj) nogil
1518
bint PyComplex_Check(object obj) nogil
1619
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
20+
char* PyUnicode_AsUTF8(object unicode)
21+
object PyUnicode_FromString(const char* u) nogil
1722

1823

1924
cdef extern from "numpy/arrayobject.h":
@@ -69,8 +74,6 @@ cdef extern from "numpy_helper.h":
6974
int assign_value_1d(ndarray, Py_ssize_t, object) except -1
7075
cnp.int64_t get_nat()
7176
object get_value_1d(ndarray, Py_ssize_t)
72-
char *get_c_string(object) except NULL
73-
object char_to_string(char*)
7477

7578
ctypedef fused numeric:
7679
cnp.int8_t
@@ -101,6 +104,26 @@ cdef extern from "headers/stdint.h":
101104
enum: INT64_MIN
102105

103106

107+
cdef inline const char* get_c_string(object obj) except NULL:
108+
"""
109+
returns ASCII or UTF8 (py3) view on python str
110+
python object owns memory, should not be freed
111+
"""
112+
# TODO: this docstring is copied verbatim from version that was
113+
# directly in numpy_helper.C; is it still accurate?
114+
IF PY3:
115+
return PyUnicode_AsUTF8(obj)
116+
ELSE:
117+
return PyString_AsString(obj)
118+
119+
120+
cdef inline object char_to_string(const char* data):
121+
IF PY3:
122+
return PyUnicode_FromString(data)
123+
ELSE:
124+
return PyString_FromString(data)
125+
126+
104127
cdef inline object get_value_at(ndarray arr, object loc):
105128
cdef:
106129
Py_ssize_t i, sz

pandas/_libs/tslib.pyx

+18-18
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,29 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
3+
cimport cython
4+
from cython cimport Py_ssize_t
5+
6+
from cpython cimport PyFloat_Check, PyUnicode_Check
7+
8+
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
9+
PyDateTime_CheckExact,
10+
PyDateTime_IMPORT,
11+
timedelta, datetime, date, time)
12+
# import datetime C API
13+
PyDateTime_IMPORT
14+
315

416
cimport numpy as cnp
517
from numpy cimport int64_t, ndarray, float64_t
618
import numpy as np
719
cnp.import_array()
820

21+
import pytz
922

10-
from cpython cimport PyFloat_Check, PyUnicode_Check
1123

1224
from util cimport (is_integer_object, is_float_object, is_string_object,
1325
is_datetime64_object)
1426

15-
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
16-
PyDateTime_CheckExact,
17-
PyDateTime_IMPORT,
18-
timedelta, datetime, date, time)
19-
# import datetime C API
20-
PyDateTime_IMPORT
21-
2227

2328
from tslibs.np_datetime cimport (check_dts_bounds,
2429
pandas_datetimestruct,
@@ -30,13 +35,6 @@ from tslibs.np_datetime import OutOfBoundsDatetime
3035

3136
from tslibs.parsing import parse_datetime_string
3237

33-
cimport cython
34-
from cython cimport Py_ssize_t
35-
36-
37-
import pytz
38-
39-
4038
from tslibs.timedeltas cimport cast_from_unit
4139
from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
4240
treat_tz_as_pytz, get_dst_info)
@@ -54,7 +52,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts,
5452
_NS_UPPER_BOUND, _NS_LOWER_BOUND)
5553
from tslibs.timestamps import Timestamp
5654

57-
cdef bint PY2 = str == bytes
55+
56+
DEF PY2 = str == bytes
5857

5958

6059
cdef inline object create_datetime_from_ts(
@@ -556,8 +555,9 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
556555
if len(val) == 0 or val in nat_strings:
557556
iresult[i] = NPY_NAT
558557
continue
559-
if PyUnicode_Check(val) and PY2:
560-
val = val.encode('utf-8')
558+
if PY2:
559+
if PyUnicode_Check(val):
560+
val = val.encode('utf-8')
561561

562562
try:
563563
_string_to_dts(val, &dts, &out_local, &out_tzoffset)

pandas/_libs/tslibs/offsets.pyx

-2
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,6 @@ class _BaseOffset(object):
334334
# other is not a DateOffset object
335335
return False
336336

337-
return self._params == other._params
338-
339337
def __ne__(self, other):
340338
return not self == other
341339

pandas/_libs/tslibs/parsing.pyx

+8-14
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ cimport cython
1010
from cython cimport Py_ssize_t
1111

1212

13-
from datetime import datetime
13+
from cpython.datetime cimport datetime
1414
import time
1515

1616
import numpy as np
@@ -37,7 +37,7 @@ from dateutil.parser import DEFAULTPARSER
3737
from dateutil.parser import parse as du_parse
3838

3939
from ccalendar import MONTH_NUMBERS
40-
from nattype import nat_strings
40+
from nattype import nat_strings, NaT
4141

4242
# ----------------------------------------------------------------------
4343
# Constants
@@ -54,9 +54,6 @@ cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')
5454

5555
cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
5656

57-
NAT_SENTINEL = object()
58-
# This allows us to reference NaT without having to import it
59-
6057
# ----------------------------------------------------------------------
6158

6259

@@ -136,9 +133,6 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
136133
res = parse_datetime_string_with_reso(arg, freq=freq,
137134
dayfirst=dayfirst,
138135
yearfirst=yearfirst)
139-
if res[0] is NAT_SENTINEL:
140-
from pandas._libs.tslib import NaT
141-
res = (NaT,) + res[1:]
142136
return res
143137

144138

@@ -206,7 +200,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default,
206200
# should be NaT???
207201

208202
if date_string in nat_strings:
209-
return NAT_SENTINEL, NAT_SENTINEL, ''
203+
return NaT, NaT, ''
210204

211205
date_string = date_string.upper()
212206
date_len = len(date_string)
@@ -407,7 +401,7 @@ def try_parse_dates(ndarray[object] values, parser=None,
407401

408402
# EAFP here
409403
try:
410-
for i from 0 <= i < n:
404+
for i in range(n):
411405
if values[i] == '':
412406
result[i] = np.nan
413407
else:
@@ -419,7 +413,7 @@ def try_parse_dates(ndarray[object] values, parser=None,
419413
parse_date = parser
420414

421415
try:
422-
for i from 0 <= i < n:
416+
for i in range(n):
423417
if values[i] == '':
424418
result[i] = np.nan
425419
else:
@@ -459,7 +453,7 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
459453
else:
460454
parse_time = time_parser
461455

462-
for i from 0 <= i < n:
456+
for i in range(n):
463457
d = parse_date(str(dates[i]))
464458
t = parse_time(str(times[i]))
465459
result[i] = datetime(d.year, d.month, d.day,
@@ -479,7 +473,7 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
479473
raise ValueError('Length of years/months/days must all be equal')
480474
result = np.empty(n, dtype='O')
481475

482-
for i from 0 <= i < n:
476+
for i in range(n):
483477
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
484478

485479
return result
@@ -505,7 +499,7 @@ def try_parse_datetime_components(ndarray[object] years,
505499
raise ValueError('Length of all datetime components must be equal')
506500
result = np.empty(n, dtype='O')
507501

508-
for i from 0 <= i < n:
502+
for i in range(n):
509503
float_secs = float(seconds[i])
510504
secs = int(float_secs)
511505

pandas/_libs/tslibs/period.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@ from conversion cimport tz_convert_utc_to_tzlocal
4646
from frequencies cimport (get_freq_code, get_base_alias,
4747
get_to_timestamp_base, get_freq_str,
4848
get_rule_month)
49-
from parsing import parse_time_string, NAT_SENTINEL
49+
from parsing import parse_time_string
5050
from resolution import Resolution
5151
from nattype import nat_strings, NaT, iNaT
5252
from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike
5353
from offsets cimport to_offset
5454
from offsets import _Tick
5555

56-
cdef bint PY2 = str == bytes
56+
DEF PY2 = str == bytes
5757

5858

5959
cdef extern from "period_helper.h":
@@ -729,7 +729,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt):
729729

730730
result = result.replace(str_extra_fmts[i], repl)
731731

732-
if PY2:
732+
IF PY2:
733733
result = result.decode('utf-8', 'ignore')
734734

735735
return result
@@ -1820,7 +1820,7 @@ class Period(_Period):
18201820
value = str(value)
18211821
value = value.upper()
18221822
dt, _, reso = parse_time_string(value, freq)
1823-
if dt is NAT_SENTINEL:
1823+
if dt is NaT:
18241824
ordinal = iNaT
18251825

18261826
if freq is None:

0 commit comments

Comments
 (0)