Skip to content

Commit 6b4750c

Browse files
jbrockmendelphofl
authored andcommitted
REF: extract_ordinals support 2D (pandas-dev#45782)
1 parent be2a042 commit 6b4750c

File tree

2 files changed

+81
-31
lines changed

2 files changed

+81
-31
lines changed

pandas/_libs/tslibs/period.pyx

+55-30
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ cimport numpy as cnp
44
from cpython.object cimport (
55
Py_EQ,
66
Py_NE,
7+
PyObject,
78
PyObject_RichCompare,
89
PyObject_RichCompareBool,
910
)
@@ -1449,45 +1450,69 @@ def from_ordinals(const int64_t[:] values, freq):
14491450

14501451
@cython.wraparound(False)
14511452
@cython.boundscheck(False)
1452-
def extract_ordinals(ndarray[object] values, freq) -> np.ndarray:
1453-
# TODO: Change type to const object[:] when Cython supports that.
1453+
def extract_ordinals(ndarray values, freq) -> np.ndarray:
1454+
# values is object-dtype, may be 2D
14541455

14551456
cdef:
1456-
Py_ssize_t i, n = len(values)
1457-
int64_t[:] ordinals = np.empty(n, dtype=np.int64)
1457+
Py_ssize_t i, n = values.size
1458+
int64_t ordinal
1459+
ndarray ordinals = np.empty((<object>values).shape, dtype=np.int64)
1460+
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values)
14581461
object p
14591462

1463+
if values.descr.type_num != cnp.NPY_OBJECT:
1464+
# if we don't raise here, we'll segfault later!
1465+
raise TypeError("extract_ordinals values must be object-dtype")
1466+
14601467
freqstr = Period._maybe_convert_freq(freq).freqstr
14611468

14621469
for i in range(n):
1463-
p = values[i]
1470+
# Analogous to: p = values[i]
1471+
p = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
14641472

1465-
if checknull_with_nat(p):
1466-
ordinals[i] = NPY_NAT
1467-
elif util.is_integer_object(p):
1468-
if p == NPY_NAT:
1469-
ordinals[i] = NPY_NAT
1470-
else:
1471-
raise TypeError(p)
1473+
ordinal = _extract_ordinal(p, freqstr, freq)
1474+
1475+
# Analogous to: ordinals[i] = ordinal
1476+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ordinal
1477+
1478+
cnp.PyArray_MultiIter_NEXT(mi)
1479+
1480+
return ordinals
1481+
1482+
1483+
cdef inline int64_t _extract_ordinal(object item, str freqstr, freq) except? -1:
1484+
"""
1485+
See extract_ordinals.
1486+
"""
1487+
cdef:
1488+
int64_t ordinal
1489+
1490+
if checknull_with_nat(item):
1491+
ordinal = NPY_NAT
1492+
elif util.is_integer_object(item):
1493+
if item == NPY_NAT:
1494+
ordinal = NPY_NAT
14721495
else:
1473-
try:
1474-
ordinals[i] = p.ordinal
1475-
1476-
if p.freqstr != freqstr:
1477-
msg = DIFFERENT_FREQ.format(cls="PeriodIndex",
1478-
own_freq=freqstr,
1479-
other_freq=p.freqstr)
1480-
raise IncompatibleFrequency(msg)
1481-
1482-
except AttributeError:
1483-
p = Period(p, freq=freq)
1484-
if p is NaT:
1485-
# input may contain NaT-like string
1486-
ordinals[i] = NPY_NAT
1487-
else:
1488-
ordinals[i] = p.ordinal
1489-
1490-
return ordinals.base # .base to access underlying np.ndarray
1496+
raise TypeError(item)
1497+
else:
1498+
try:
1499+
ordinal = item.ordinal
1500+
1501+
if item.freqstr != freqstr:
1502+
msg = DIFFERENT_FREQ.format(cls="PeriodIndex",
1503+
own_freq=freqstr,
1504+
other_freq=item.freqstr)
1505+
raise IncompatibleFrequency(msg)
1506+
1507+
except AttributeError:
1508+
item = Period(item, freq=freq)
1509+
if item is NaT:
1510+
# input may contain NaT-like string
1511+
ordinal = NPY_NAT
1512+
else:
1513+
ordinal = item.ordinal
1514+
1515+
return ordinal
14911516

14921517

14931518
def extract_freq(ndarray[object] values) -> BaseOffset:

pandas/tests/tslibs/test_period_asfreq.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
import numpy as np
12
import pytest
23

3-
from pandas._libs.tslibs import to_offset
4+
from pandas._libs.tslibs import (
5+
iNaT,
6+
to_offset,
7+
)
48
from pandas._libs.tslibs.period import (
9+
extract_ordinals,
510
period_asfreq,
611
period_ordinal,
712
)
813

14+
import pandas._testing as tm
15+
916

1017
def get_freq_code(freqstr: str) -> int:
1118
off = to_offset(freqstr)
@@ -88,3 +95,21 @@ def test_period_ordinal_business_day(day, expected):
8895
# 5000 is PeriodDtypeCode for BusinessDay
8996
args = (2013, 10, day, 0, 0, 0, 0, 0, 5000)
9097
assert period_ordinal(*args) == expected
98+
99+
100+
class TestExtractOrdinals:
101+
def test_extract_ordinals_raises(self):
102+
# with non-object, make sure we raise TypeError, not segfault
103+
arr = np.arange(5)
104+
freq = to_offset("D")
105+
with pytest.raises(TypeError, match="values must be object-dtype"):
106+
extract_ordinals(arr, freq)
107+
108+
def test_extract_ordinals_2d(self):
109+
freq = to_offset("D")
110+
arr = np.empty(10, dtype=object)
111+
arr[:] = iNaT
112+
113+
res = extract_ordinals(arr, freq)
114+
res2 = extract_ordinals(arr.reshape(5, 2), freq)
115+
tm.assert_numpy_array_equal(res, res2.reshape(-1))

0 commit comments

Comments
 (0)