From 36b091e15c06d29f4e1f551003f367e355e4ebc8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Feb 2022 11:17:31 -0800 Subject: [PATCH] REF: extract_ordinals support 2D --- pandas/_libs/tslibs/period.pyx | 85 +++++++++++++++-------- pandas/tests/tslibs/test_period_asfreq.py | 27 ++++++- 2 files changed, 81 insertions(+), 31 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index b2ea2e746b44c..8c331b13f9735 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -4,6 +4,7 @@ cimport numpy as cnp from cpython.object cimport ( Py_EQ, Py_NE, + PyObject, PyObject_RichCompare, PyObject_RichCompareBool, ) @@ -1449,45 +1450,69 @@ def from_ordinals(const int64_t[:] values, freq): @cython.wraparound(False) @cython.boundscheck(False) -def extract_ordinals(ndarray[object] values, freq) -> np.ndarray: - # TODO: Change type to const object[:] when Cython supports that. +def extract_ordinals(ndarray values, freq) -> np.ndarray: + # values is object-dtype, may be 2D cdef: - Py_ssize_t i, n = len(values) - int64_t[:] ordinals = np.empty(n, dtype=np.int64) + Py_ssize_t i, n = values.size + int64_t ordinal + ndarray ordinals = np.empty((values).shape, dtype=np.int64) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) object p + if values.descr.type_num != cnp.NPY_OBJECT: + # if we don't raise here, we'll segfault later! + raise TypeError("extract_ordinals values must be object-dtype") + freqstr = Period._maybe_convert_freq(freq).freqstr for i in range(n): - p = values[i] + # Analogous to: p = values[i] + p = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - if checknull_with_nat(p): - ordinals[i] = NPY_NAT - elif util.is_integer_object(p): - if p == NPY_NAT: - ordinals[i] = NPY_NAT - else: - raise TypeError(p) + ordinal = _extract_ordinal(p, freqstr, freq) + + # Analogous to: ordinals[i] = ordinal + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ordinal + + cnp.PyArray_MultiIter_NEXT(mi) + + return ordinals + + +cdef inline int64_t _extract_ordinal(object item, str freqstr, freq) except? -1: + """ + See extract_ordinals. + """ + cdef: + int64_t ordinal + + if checknull_with_nat(item): + ordinal = NPY_NAT + elif util.is_integer_object(item): + if item == NPY_NAT: + ordinal = NPY_NAT else: - try: - ordinals[i] = p.ordinal - - if p.freqstr != freqstr: - msg = DIFFERENT_FREQ.format(cls="PeriodIndex", - own_freq=freqstr, - other_freq=p.freqstr) - raise IncompatibleFrequency(msg) - - except AttributeError: - p = Period(p, freq=freq) - if p is NaT: - # input may contain NaT-like string - ordinals[i] = NPY_NAT - else: - ordinals[i] = p.ordinal - - return ordinals.base # .base to access underlying np.ndarray + raise TypeError(item) + else: + try: + ordinal = item.ordinal + + if item.freqstr != freqstr: + msg = DIFFERENT_FREQ.format(cls="PeriodIndex", + own_freq=freqstr, + other_freq=item.freqstr) + raise IncompatibleFrequency(msg) + + except AttributeError: + item = Period(item, freq=freq) + if item is NaT: + # input may contain NaT-like string + ordinal = NPY_NAT + else: + ordinal = item.ordinal + + return ordinal def extract_freq(ndarray[object] values) -> BaseOffset: diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 2592fdbb2d361..84c3d07470c38 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -1,11 +1,18 @@ +import numpy as np import pytest -from pandas._libs.tslibs import to_offset +from pandas._libs.tslibs import ( + iNaT, + to_offset, +) from pandas._libs.tslibs.period import ( + extract_ordinals, period_asfreq, period_ordinal, ) +import pandas._testing as tm + def get_freq_code(freqstr: str) -> int: off = to_offset(freqstr) @@ -88,3 +95,21 @@ def test_period_ordinal_business_day(day, expected): # 5000 is PeriodDtypeCode for BusinessDay args = (2013, 10, day, 0, 0, 0, 0, 0, 5000) assert period_ordinal(*args) == expected + + +class TestExtractOrdinals: + def test_extract_ordinals_raises(self): + # with non-object, make sure we raise TypeError, not segfault + arr = np.arange(5) + freq = to_offset("D") + with pytest.raises(TypeError, match="values must be object-dtype"): + extract_ordinals(arr, freq) + + def test_extract_ordinals_2d(self): + freq = to_offset("D") + arr = np.empty(10, dtype=object) + arr[:] = iNaT + + res = extract_ordinals(arr, freq) + res2 = extract_ordinals(arr.reshape(5, 2), freq) + tm.assert_numpy_array_equal(res, res2.reshape(-1))