Skip to content

use memoryviews instead of ndarrays #22147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import cython

import numpy as np
from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
from numpy cimport uint8_t, uint32_t, uint64_t

from util cimport _checknull
from cpython cimport (PyBytes_Check,
Expand All @@ -17,7 +17,7 @@ DEF dROUNDS = 4


@cython.boundscheck(False)
def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
def hash_object_array(object[:] arr, object key, object encoding='utf8'):
"""
Parameters
----------
Expand All @@ -37,7 +37,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
"""
cdef:
Py_ssize_t i, l, n
ndarray[uint64_t] result
uint64_t[:] result
bytes data, k
uint8_t *kb
uint64_t *lens
Expand Down Expand Up @@ -89,7 +89,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):

free(vecs)
free(lens)
return result
return result.base # .base to retrieve underlying np.ndarray


cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
Expand Down
11 changes: 5 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
# cython: profile=False
cimport cython
from cython cimport Py_ssize_t

from cpython cimport PyFloat_Check, PyUnicode_Check
Expand Down Expand Up @@ -37,8 +36,7 @@ from tslibs.np_datetime import OutOfBoundsDatetime
from tslibs.parsing import parse_datetime_string

from tslibs.timedeltas cimport cast_from_unit
from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_pytz, get_dst_info)
from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info
from tslibs.conversion cimport (tz_convert_single, _TSObject,
convert_datetime_to_tsobject,
get_datetime64_nanos,
Expand Down Expand Up @@ -77,8 +75,7 @@ cdef inline object create_time_from_ts(
return time(dts.hour, dts.min, dts.sec, dts.us)


def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
box="datetime"):
def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
"""
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp

Expand All @@ -102,7 +99,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,

cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t pos
npy_datetimestruct dts
object dt
int64_t value, delta
Expand Down
50 changes: 27 additions & 23 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def ensure_datetime64ns(ndarray arr, copy=True):
"""
cdef:
Py_ssize_t i, n = arr.size
ndarray[int64_t] ivalues, iresult
int64_t[:] ivalues, iresult
NPY_DATETIMEUNIT unit
npy_datetimestruct dts

Expand Down Expand Up @@ -139,7 +139,7 @@ def ensure_timedelta64ns(ndarray arr, copy=True):
return arr.astype(TD_DTYPE, copy=copy)


def datetime_to_datetime64(ndarray[object] values):
def datetime_to_datetime64(object[:] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Expand All @@ -156,7 +156,7 @@ def datetime_to_datetime64(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
object val, inferred_tz = None
ndarray[int64_t] iresult
int64_t[:] iresult
npy_datetimestruct dts
_TSObject _ts

Expand Down Expand Up @@ -525,7 +525,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
Sets obj.tzinfo inplace, alters obj.dts inplace.
"""
cdef:
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
int64_t local_val
Py_ssize_t pos

Expand Down Expand Up @@ -631,15 +632,16 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz,
cdef:
Py_ssize_t n = len(values)
Py_ssize_t i, j, pos
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] tt, trans, deltas
ndarray[Py_ssize_t] posn
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] tt, trans
int64_t[:] deltas
Py_ssize_t[:] posn
int64_t v

trans, deltas, typ = get_dst_info(tz)
if not to_utc:
# We add `offset` below instead of subtracting it
deltas = -1 * deltas
deltas = -1 * np.array(deltas, dtype='i8')

tt = values[values != NPY_NAT]
if not len(tt):
Expand Down Expand Up @@ -728,7 +730,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
converted: int64
"""
cdef:
ndarray[int64_t] trans, deltas
int64_t[:] deltas
Py_ssize_t pos
int64_t v, offset, utc_date
npy_datetimestruct dts
Expand Down Expand Up @@ -756,7 +758,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
else:
# Convert UTC to other timezone
arr = np.array([utc_date])
# Note: at least with cython 0.28.3, doing a looking `[0]` in the next
# Note: at least with cython 0.28.3, doing a lookup `[0]` in the next
# line is sensitive to the declared return type of _tz_convert_dst;
# if it is declared as returning ndarray[int64_t], a compile-time error
# is raised.
Expand All @@ -781,10 +783,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
"""

cdef:
ndarray[int64_t] utc_dates, tt, result, trans, deltas
ndarray[int64_t] utc_dates, result
Py_ssize_t i, j, pos, n = len(vals)
int64_t v, offset, delta
npy_datetimestruct dts
int64_t v

if len(vals) == 0:
return np.array([], dtype=np.int64)
Expand Down Expand Up @@ -843,7 +844,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
localized : ndarray[int64_t]
"""
cdef:
ndarray[int64_t] trans, deltas, idx_shifted
ndarray[int64_t] trans
int64_t[:] deltas, idx_shifted
ndarray ambiguous_array
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
int64_t *tdata
Expand Down Expand Up @@ -1069,7 +1071,7 @@ def normalize_date(object dt):

@cython.wraparound(False)
@cython.boundscheck(False)
def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
def normalize_i8_timestamps(int64_t[:] stamps, tz=None):
"""
Normalize each of the (nanosecond) timestamps in the given array by
rounding down to the beginning of the day (i.e. midnight). If `tz`
Expand All @@ -1087,7 +1089,7 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
npy_datetimestruct dts
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
int64_t[:] result = np.empty(n, dtype=np.int64)

if tz is not None:
tz = maybe_get_tz(tz)
Expand All @@ -1101,12 +1103,12 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
dt64_to_dtstruct(stamps[i], &dts)
result[i] = _normalized_stamp(&dts)

return result
return result.base # .base to access underlying np.ndarray


@cython.wraparound(False)
@cython.boundscheck(False)
cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz):
"""
Normalize each of the (nanosecond) timestamps in the given array by
rounding down to the beginning of the day (i.e. midnight) for the
Expand All @@ -1123,8 +1125,9 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
"""
cdef:
Py_ssize_t n = len(stamps)
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans, deltas
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t delta
Expand Down Expand Up @@ -1190,7 +1193,7 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil:
return dtstruct_to_dt64(dts)


def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
def is_date_array_normalized(int64_t[:] stamps, tz=None):
"""
Check if all of the given (nanosecond) timestamps are normalized to
midnight, i.e. hour == minute == second == 0. If the optional timezone
Expand All @@ -1206,8 +1209,9 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
is_normalized : bool True if all stamps are normalized
"""
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans, deltas
Py_ssize_t pos, i, n = len(stamps)
ndarray[int64_t] trans
int64_t[:] deltas
npy_datetimestruct dts
int64_t local_val, delta

Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):

@cython.wraparound(False)
@cython.boundscheck(False)
def get_date_name_field(ndarray[int64_t] dtindex, object field,
object locale=None):
def get_date_name_field(int64_t[:] dtindex, object field, object locale=None):
"""
Given a int64-based datetime index, return array of strings of date
name based on requested field (e.g. weekday_name)
Expand Down Expand Up @@ -134,7 +133,7 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field,


@cython.wraparound(False)
def get_start_end_field(ndarray[int64_t] dtindex, object field,
def get_start_end_field(int64_t[:] dtindex, object field,
object freqstr=None, int month_kw=12):
"""
Given an int64-based datetime index return array of indicators
Expand Down
37 changes: 18 additions & 19 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ from cpython.datetime cimport datetime
import time

import numpy as np
from numpy cimport ndarray

# Avoid import from outside _libs
if sys.version_info.major == 2:
Expand Down Expand Up @@ -381,11 +380,11 @@ cpdef object _get_rule_month(object source, object default='DEC'):
# Parsing for type-inference


def try_parse_dates(ndarray[object] values, parser=None,
def try_parse_dates(object[:] values, parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(values)
result = np.empty(n, dtype='O')
Expand Down Expand Up @@ -420,15 +419,15 @@ def try_parse_dates(ndarray[object] values, parser=None,
# raise if passed parser and it failed
raise

return result
return result.base # .base to access underlying ndarray


def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
def try_parse_date_and_time(object[:] dates, object[:] times,
date_parser=None, time_parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(dates)
if len(times) != n:
Expand Down Expand Up @@ -457,14 +456,14 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
result[i] = datetime(d.year, d.month, d.day,
t.hour, t.minute, t.second)

return result
return result.base # .base to access underlying ndarray


def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
ndarray[object] days):
def try_parse_year_month_day(object[:] years, object[:] months,
object[:] days):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(years)
if len(months) != n or len(days) != n:
Expand All @@ -474,19 +473,19 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
for i in range(n):
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))

return result
return result.base # .base to access underlying ndarray


def try_parse_datetime_components(ndarray[object] years,
ndarray[object] months,
ndarray[object] days,
ndarray[object] hours,
ndarray[object] minutes,
ndarray[object] seconds):
def try_parse_datetime_components(object[:] years,
object[:] months,
object[:] days,
object[:] hours,
object[:] minutes,
object[:] seconds):

cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result
int secs
double float_secs
double micros
Expand All @@ -509,7 +508,7 @@ def try_parse_datetime_components(ndarray[object] years,
int(hours[i]), int(minutes[i]), secs,
int(micros))

return result
return result.base # .base to access underlying ndarray


# ----------------------------------------------------------------------
Expand Down
Loading