Skip to content

REF: extract_ordinals support 2D #45782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 55 additions & 30 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ cimport numpy as cnp
from cpython.object cimport (
Py_EQ,
Py_NE,
PyObject,
PyObject_RichCompare,
PyObject_RichCompareBool,
)
Expand Down Expand Up @@ -1449,45 +1450,69 @@ def from_ordinals(const int64_t[:] values, freq):

@cython.wraparound(False)
@cython.boundscheck(False)
def extract_ordinals(ndarray[object] values, freq) -> np.ndarray:
# TODO: Change type to const object[:] when Cython supports that.
def extract_ordinals(ndarray values, freq) -> np.ndarray:
# values is object-dtype, may be 2D

cdef:
Py_ssize_t i, n = len(values)
int64_t[:] ordinals = np.empty(n, dtype=np.int64)
Py_ssize_t i, n = values.size
int64_t ordinal
ndarray ordinals = np.empty((<object>values).shape, dtype=np.int64)
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values)
object p

if values.descr.type_num != cnp.NPY_OBJECT:
# if we don't raise here, we'll segfault later!
raise TypeError("extract_ordinals values must be object-dtype")

freqstr = Period._maybe_convert_freq(freq).freqstr

for i in range(n):
p = values[i]
# Analogous to: p = values[i]
p = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

if checknull_with_nat(p):
ordinals[i] = NPY_NAT
elif util.is_integer_object(p):
if p == NPY_NAT:
ordinals[i] = NPY_NAT
else:
raise TypeError(p)
ordinal = _extract_ordinal(p, freqstr, freq)

# Analogous to: ordinals[i] = ordinal
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ordinal

cnp.PyArray_MultiIter_NEXT(mi)

return ordinals


cdef inline int64_t _extract_ordinal(object item, str freqstr, freq) except? -1:
"""
See extract_ordinals.
"""
cdef:
int64_t ordinal

if checknull_with_nat(item):
ordinal = NPY_NAT
elif util.is_integer_object(item):
if item == NPY_NAT:
ordinal = NPY_NAT
else:
try:
ordinals[i] = p.ordinal

if p.freqstr != freqstr:
msg = DIFFERENT_FREQ.format(cls="PeriodIndex",
own_freq=freqstr,
other_freq=p.freqstr)
raise IncompatibleFrequency(msg)

except AttributeError:
p = Period(p, freq=freq)
if p is NaT:
# input may contain NaT-like string
ordinals[i] = NPY_NAT
else:
ordinals[i] = p.ordinal

return ordinals.base # .base to access underlying np.ndarray
raise TypeError(item)
else:
try:
ordinal = item.ordinal

if item.freqstr != freqstr:
msg = DIFFERENT_FREQ.format(cls="PeriodIndex",
own_freq=freqstr,
other_freq=item.freqstr)
raise IncompatibleFrequency(msg)

except AttributeError:
item = Period(item, freq=freq)
if item is NaT:
# input may contain NaT-like string
ordinal = NPY_NAT
else:
ordinal = item.ordinal

return ordinal


def extract_freq(ndarray[object] values) -> BaseOffset:
Expand Down
27 changes: 26 additions & 1 deletion pandas/tests/tslibs/test_period_asfreq.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import numpy as np
import pytest

from pandas._libs.tslibs import to_offset
from pandas._libs.tslibs import (
iNaT,
to_offset,
)
from pandas._libs.tslibs.period import (
extract_ordinals,
period_asfreq,
period_ordinal,
)

import pandas._testing as tm


def get_freq_code(freqstr: str) -> int:
off = to_offset(freqstr)
Expand Down Expand Up @@ -88,3 +95,21 @@ def test_period_ordinal_business_day(day, expected):
# 5000 is PeriodDtypeCode for BusinessDay
args = (2013, 10, day, 0, 0, 0, 0, 0, 5000)
assert period_ordinal(*args) == expected


class TestExtractOrdinals:
def test_extract_ordinals_raises(self):
# with non-object, make sure we raise TypeError, not segfault
arr = np.arange(5)
freq = to_offset("D")
with pytest.raises(TypeError, match="values must be object-dtype"):
extract_ordinals(arr, freq)

def test_extract_ordinals_2d(self):
freq = to_offset("D")
arr = np.empty(10, dtype=object)
arr[:] = iNaT

res = extract_ordinals(arr, freq)
res2 = extract_ordinals(arr.reshape(5, 2), freq)
tm.assert_numpy_array_equal(res, res2.reshape(-1))