Skip to content

Commit d010469

Browse files
jbrockmendeljreback
authored andcommitted
use memoryviews instead of ndarrays (#22147)
1 parent 599631c commit d010469

12 files changed

+100
-98
lines changed

pandas/_libs/hashing.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import cython
66

77
import numpy as np
8-
from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
8+
from numpy cimport uint8_t, uint32_t, uint64_t
99

1010
from util cimport _checknull
1111
from cpython cimport (PyBytes_Check,
@@ -17,7 +17,7 @@ DEF dROUNDS = 4
1717

1818

1919
@cython.boundscheck(False)
20-
def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
20+
def hash_object_array(object[:] arr, object key, object encoding='utf8'):
2121
"""
2222
Parameters
2323
----------
@@ -37,7 +37,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
3737
"""
3838
cdef:
3939
Py_ssize_t i, l, n
40-
ndarray[uint64_t] result
40+
uint64_t[:] result
4141
bytes data, k
4242
uint8_t *kb
4343
uint64_t *lens
@@ -89,7 +89,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
8989

9090
free(vecs)
9191
free(lens)
92-
return result
92+
return result.base # .base to retrieve underlying np.ndarray
9393

9494

9595
cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:

pandas/_libs/tslib.pyx

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
3-
cimport cython
43
from cython cimport Py_ssize_t
54

65
from cpython cimport PyFloat_Check, PyUnicode_Check
@@ -37,8 +36,7 @@ from tslibs.np_datetime import OutOfBoundsDatetime
3736
from tslibs.parsing import parse_datetime_string
3837

3938
from tslibs.timedeltas cimport cast_from_unit
40-
from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
41-
treat_tz_as_pytz, get_dst_info)
39+
from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info
4240
from tslibs.conversion cimport (tz_convert_single, _TSObject,
4341
convert_datetime_to_tsobject,
4442
get_datetime64_nanos,
@@ -77,8 +75,7 @@ cdef inline object create_time_from_ts(
7775
return time(dts.hour, dts.min, dts.sec, dts.us)
7876

7977

80-
def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
81-
box="datetime"):
78+
def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
8279
"""
8380
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
8481
@@ -102,7 +99,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
10299

103100
cdef:
104101
Py_ssize_t i, n = len(arr)
105-
ndarray[int64_t] trans, deltas
102+
ndarray[int64_t] trans
103+
int64_t[:] deltas
104+
Py_ssize_t pos
106105
npy_datetimestruct dts
107106
object dt
108107
int64_t value, delta

pandas/_libs/tslibs/conversion.pyx

+27-23
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def ensure_datetime64ns(ndarray arr, copy=True):
9191
"""
9292
cdef:
9393
Py_ssize_t i, n = arr.size
94-
ndarray[int64_t] ivalues, iresult
94+
int64_t[:] ivalues, iresult
9595
NPY_DATETIMEUNIT unit
9696
npy_datetimestruct dts
9797

@@ -139,7 +139,7 @@ def ensure_timedelta64ns(ndarray arr, copy=True):
139139
return arr.astype(TD_DTYPE, copy=copy)
140140

141141

142-
def datetime_to_datetime64(ndarray[object] values):
142+
def datetime_to_datetime64(object[:] values):
143143
"""
144144
Convert ndarray of datetime-like objects to int64 array representing
145145
nanosecond timestamps.
@@ -156,7 +156,7 @@ def datetime_to_datetime64(ndarray[object] values):
156156
cdef:
157157
Py_ssize_t i, n = len(values)
158158
object val, inferred_tz = None
159-
ndarray[int64_t] iresult
159+
int64_t[:] iresult
160160
npy_datetimestruct dts
161161
_TSObject _ts
162162

@@ -525,7 +525,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
525525
Sets obj.tzinfo inplace, alters obj.dts inplace.
526526
"""
527527
cdef:
528-
ndarray[int64_t] trans, deltas
528+
ndarray[int64_t] trans
529+
int64_t[:] deltas
529530
int64_t local_val
530531
Py_ssize_t pos
531532

@@ -631,15 +632,16 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz,
631632
cdef:
632633
Py_ssize_t n = len(values)
633634
Py_ssize_t i, j, pos
634-
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
635-
ndarray[int64_t] tt, trans, deltas
636-
ndarray[Py_ssize_t] posn
635+
int64_t[:] result = np.empty(n, dtype=np.int64)
636+
ndarray[int64_t] tt, trans
637+
int64_t[:] deltas
638+
Py_ssize_t[:] posn
637639
int64_t v
638640

639641
trans, deltas, typ = get_dst_info(tz)
640642
if not to_utc:
641643
# We add `offset` below instead of subtracting it
642-
deltas = -1 * deltas
644+
deltas = -1 * np.array(deltas, dtype='i8')
643645

644646
tt = values[values != NPY_NAT]
645647
if not len(tt):
@@ -728,7 +730,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
728730
converted: int64
729731
"""
730732
cdef:
731-
ndarray[int64_t] trans, deltas
733+
int64_t[:] deltas
732734
Py_ssize_t pos
733735
int64_t v, offset, utc_date
734736
npy_datetimestruct dts
@@ -756,7 +758,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
756758
else:
757759
# Convert UTC to other timezone
758760
arr = np.array([utc_date])
759-
# Note: at least with cython 0.28.3, doing a looking `[0]` in the next
761+
# Note: at least with cython 0.28.3, doing a lookup `[0]` in the next
760762
# line is sensitive to the declared return type of _tz_convert_dst;
761763
# if it is declared as returning ndarray[int64_t], a compile-time error
762764
# is raised.
@@ -781,10 +783,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
781783
"""
782784

783785
cdef:
784-
ndarray[int64_t] utc_dates, tt, result, trans, deltas
786+
ndarray[int64_t] utc_dates, result
785787
Py_ssize_t i, j, pos, n = len(vals)
786-
int64_t v, offset, delta
787-
npy_datetimestruct dts
788+
int64_t v
788789

789790
if len(vals) == 0:
790791
return np.array([], dtype=np.int64)
@@ -843,7 +844,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
843844
localized : ndarray[int64_t]
844845
"""
845846
cdef:
846-
ndarray[int64_t] trans, deltas, idx_shifted
847+
ndarray[int64_t] trans
848+
int64_t[:] deltas, idx_shifted
847849
ndarray ambiguous_array
848850
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
849851
int64_t *tdata
@@ -1069,7 +1071,7 @@ def normalize_date(object dt):
10691071

10701072
@cython.wraparound(False)
10711073
@cython.boundscheck(False)
1072-
def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
1074+
def normalize_i8_timestamps(int64_t[:] stamps, tz=None):
10731075
"""
10741076
Normalize each of the (nanosecond) timestamps in the given array by
10751077
rounding down to the beginning of the day (i.e. midnight). If `tz`
@@ -1087,7 +1089,7 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
10871089
cdef:
10881090
Py_ssize_t i, n = len(stamps)
10891091
npy_datetimestruct dts
1090-
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
1092+
int64_t[:] result = np.empty(n, dtype=np.int64)
10911093

10921094
if tz is not None:
10931095
tz = maybe_get_tz(tz)
@@ -1101,12 +1103,12 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
11011103
dt64_to_dtstruct(stamps[i], &dts)
11021104
result[i] = _normalized_stamp(&dts)
11031105

1104-
return result
1106+
return result.base # .base to access underlying np.ndarray
11051107

11061108

11071109
@cython.wraparound(False)
11081110
@cython.boundscheck(False)
1109-
cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
1111+
cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz):
11101112
"""
11111113
Normalize each of the (nanosecond) timestamps in the given array by
11121114
rounding down to the beginning of the day (i.e. midnight) for the
@@ -1123,8 +1125,9 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
11231125
"""
11241126
cdef:
11251127
Py_ssize_t n = len(stamps)
1126-
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
1127-
ndarray[int64_t] trans, deltas
1128+
int64_t[:] result = np.empty(n, dtype=np.int64)
1129+
ndarray[int64_t] trans
1130+
int64_t[:] deltas
11281131
Py_ssize_t[:] pos
11291132
npy_datetimestruct dts
11301133
int64_t delta
@@ -1190,7 +1193,7 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil:
11901193
return dtstruct_to_dt64(dts)
11911194

11921195

1193-
def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
1196+
def is_date_array_normalized(int64_t[:] stamps, tz=None):
11941197
"""
11951198
Check if all of the given (nanosecond) timestamps are normalized to
11961199
midnight, i.e. hour == minute == second == 0. If the optional timezone
@@ -1206,8 +1209,9 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
12061209
is_normalized : bool True if all stamps are normalized
12071210
"""
12081211
cdef:
1209-
Py_ssize_t i, n = len(stamps)
1210-
ndarray[int64_t] trans, deltas
1212+
Py_ssize_t pos, i, n = len(stamps)
1213+
ndarray[int64_t] trans
1214+
int64_t[:] deltas
12111215
npy_datetimestruct dts
12121216
int64_t local_val, delta
12131217

pandas/_libs/tslibs/fields.pyx

+2-3
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):
8585

8686
@cython.wraparound(False)
8787
@cython.boundscheck(False)
88-
def get_date_name_field(ndarray[int64_t] dtindex, object field,
89-
object locale=None):
88+
def get_date_name_field(int64_t[:] dtindex, object field, object locale=None):
9089
"""
9190
Given a int64-based datetime index, return array of strings of date
9291
name based on requested field (e.g. weekday_name)
@@ -134,7 +133,7 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field,
134133

135134

136135
@cython.wraparound(False)
137-
def get_start_end_field(ndarray[int64_t] dtindex, object field,
136+
def get_start_end_field(int64_t[:] dtindex, object field,
138137
object freqstr=None, int month_kw=12):
139138
"""
140139
Given an int64-based datetime index return array of indicators

pandas/_libs/tslibs/parsing.pyx

+18-19
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ from cpython.datetime cimport datetime
1414
import time
1515

1616
import numpy as np
17-
from numpy cimport ndarray
1817

1918
# Avoid import from outside _libs
2019
if sys.version_info.major == 2:
@@ -381,11 +380,11 @@ cpdef object _get_rule_month(object source, object default='DEC'):
381380
# Parsing for type-inference
382381

383382

384-
def try_parse_dates(ndarray[object] values, parser=None,
383+
def try_parse_dates(object[:] values, parser=None,
385384
dayfirst=False, default=None):
386385
cdef:
387386
Py_ssize_t i, n
388-
ndarray[object] result
387+
object[:] result
389388

390389
n = len(values)
391390
result = np.empty(n, dtype='O')
@@ -420,15 +419,15 @@ def try_parse_dates(ndarray[object] values, parser=None,
420419
# raise if passed parser and it failed
421420
raise
422421

423-
return result
422+
return result.base # .base to access underlying ndarray
424423

425424

426-
def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
425+
def try_parse_date_and_time(object[:] dates, object[:] times,
427426
date_parser=None, time_parser=None,
428427
dayfirst=False, default=None):
429428
cdef:
430429
Py_ssize_t i, n
431-
ndarray[object] result
430+
object[:] result
432431

433432
n = len(dates)
434433
if len(times) != n:
@@ -457,14 +456,14 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
457456
result[i] = datetime(d.year, d.month, d.day,
458457
t.hour, t.minute, t.second)
459458

460-
return result
459+
return result.base # .base to access underlying ndarray
461460

462461

463-
def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
464-
ndarray[object] days):
462+
def try_parse_year_month_day(object[:] years, object[:] months,
463+
object[:] days):
465464
cdef:
466465
Py_ssize_t i, n
467-
ndarray[object] result
466+
object[:] result
468467

469468
n = len(years)
470469
if len(months) != n or len(days) != n:
@@ -474,19 +473,19 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
474473
for i in range(n):
475474
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
476475

477-
return result
476+
return result.base # .base to access underlying ndarray
478477

479478

480-
def try_parse_datetime_components(ndarray[object] years,
481-
ndarray[object] months,
482-
ndarray[object] days,
483-
ndarray[object] hours,
484-
ndarray[object] minutes,
485-
ndarray[object] seconds):
479+
def try_parse_datetime_components(object[:] years,
480+
object[:] months,
481+
object[:] days,
482+
object[:] hours,
483+
object[:] minutes,
484+
object[:] seconds):
486485

487486
cdef:
488487
Py_ssize_t i, n
489-
ndarray[object] result
488+
object[:] result
490489
int secs
491490
double float_secs
492491
double micros
@@ -509,7 +508,7 @@ def try_parse_datetime_components(ndarray[object] years,
509508
int(hours[i]), int(minutes[i]), secs,
510509
int(micros))
511510

512-
return result
511+
return result.base # .base to access underlying ndarray
513512

514513

515514
# ----------------------------------------------------------------------

0 commit comments

Comments
 (0)