Skip to content

Commit 1e73327

Browse files
Merge remote-tracking branch 'upstream/main' into td64-tests
2 parents 33203b3 + d86e200 commit 1e73327

33 files changed

+392
-208
lines changed

asv_bench/benchmarks/tslibs/fields.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ class TimeGetStartEndField:
6666

6767
def setup(self, size, side, period, freqstr, month_kw):
6868
arr = np.random.randint(0, 10, size=size, dtype="i8")
69-
self.i8data = arr
69+
self.dt64data = arr.view("M8[ns]")
7070

7171
self.attrname = f"is_{period}_{side}"
7272

7373
def time_get_start_end_field(self, size, side, period, freqstr, month_kw):
74-
get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw)
74+
get_start_end_field(self.dt64data, self.attrname, freqstr, month_kw=month_kw)

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ Other Deprecations
527527
- Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`)
528528
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
529529
- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
530+
- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`)
530531

531532
.. ---------------------------------------------------------------------------
532533
.. _whatsnew_150.performance:
@@ -708,6 +709,7 @@ Groupby/resample/rolling
708709
- Bug in :meth:`Rolling.var` and :meth:`Rolling.std` would give non-zero result with window of same values (:issue:`42064`)
709710
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
710711
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
712+
- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
711713

712714
Reshaping
713715
^^^^^^^^^

pandas/_libs/groupby.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -1325,8 +1325,8 @@ def group_rank(
13251325
mask=sub_mask,
13261326
)
13271327
for i in range(len(result)):
1328-
# TODO: why can't we do out[:, k] = result?
1329-
out[i, k] = result[i]
1328+
if labels[i] >= 0:
1329+
out[i, k] = result[i]
13301330

13311331

13321332
# ----------------------------------------------------------------------

pandas/_libs/join.pyx

+9-70
Original file line numberDiff line numberDiff line change
@@ -839,11 +839,16 @@ def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values,
839839
by_t[:] left_by_values,
840840
by_t[:] right_by_values,
841841
bint allow_exact_matches=True,
842-
tolerance=None):
842+
tolerance=None,
843+
bint use_hashtable=True):
843844

844845
cdef:
845846
ndarray[intp_t] bli, bri, fli, fri
846847

848+
ndarray[intp_t] left_indexer, right_indexer
849+
Py_ssize_t left_size, i
850+
numeric_t bdiff, fdiff
851+
847852
# search both forward and backward
848853
bli, bri = asof_join_backward_on_X_by_Y(
849854
left_values,
@@ -852,6 +857,7 @@ def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values,
852857
right_by_values,
853858
allow_exact_matches,
854859
tolerance,
860+
use_hashtable
855861
)
856862
fli, fri = asof_join_forward_on_X_by_Y(
857863
left_values,
@@ -860,26 +866,11 @@ def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values,
860866
right_by_values,
861867
allow_exact_matches,
862868
tolerance,
869+
use_hashtable
863870
)
864871

865-
return _choose_smaller_timestamp(left_values, right_values, bli, bri, fli, fri)
866-
867-
868-
cdef _choose_smaller_timestamp(
869-
numeric_t[:] left_values,
870-
numeric_t[:] right_values,
871-
ndarray[intp_t] bli,
872-
ndarray[intp_t] bri,
873-
ndarray[intp_t] fli,
874-
ndarray[intp_t] fri,
875-
):
876-
cdef:
877-
ndarray[intp_t] left_indexer, right_indexer
878-
Py_ssize_t left_size, i
879-
numeric_t bdiff, fdiff
880-
872+
# choose the smaller timestamp
881873
left_size = len(left_values)
882-
883874
left_indexer = np.empty(left_size, dtype=np.intp)
884875
right_indexer = np.empty(left_size, dtype=np.intp)
885876

@@ -894,55 +885,3 @@ cdef _choose_smaller_timestamp(
894885
left_indexer[i] = bli[i]
895886

896887
return left_indexer, right_indexer
897-
898-
899-
# ----------------------------------------------------------------------
900-
# asof_join
901-
# ----------------------------------------------------------------------
902-
903-
def asof_join_backward(numeric_t[:] left_values,
904-
numeric_t[:] right_values,
905-
bint allow_exact_matches=True,
906-
tolerance=None):
907-
908-
return asof_join_backward_on_X_by_Y(
909-
left_values,
910-
right_values,
911-
None,
912-
None,
913-
allow_exact_matches=allow_exact_matches,
914-
tolerance=tolerance,
915-
use_hashtable=False,
916-
)
917-
918-
919-
def asof_join_forward(numeric_t[:] left_values,
920-
numeric_t[:] right_values,
921-
bint allow_exact_matches=True,
922-
tolerance=None):
923-
return asof_join_forward_on_X_by_Y(
924-
left_values,
925-
right_values,
926-
None,
927-
None,
928-
allow_exact_matches=allow_exact_matches,
929-
tolerance=tolerance,
930-
use_hashtable=False,
931-
)
932-
933-
934-
def asof_join_nearest(numeric_t[:] left_values,
935-
numeric_t[:] right_values,
936-
bint allow_exact_matches=True,
937-
tolerance=None):
938-
939-
cdef:
940-
ndarray[intp_t] bli, bri, fli, fri
941-
942-
# search both forward and backward
943-
bli, bri = asof_join_backward(left_values, right_values,
944-
allow_exact_matches, tolerance)
945-
fli, fri = asof_join_forward(left_values, right_values,
946-
allow_exact_matches, tolerance)
947-
948-
return _choose_smaller_timestamp(left_values, right_values, bli, bri, fli, fri)

pandas/_libs/tslibs/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"Tick",
2626
"BaseOffset",
2727
"tz_compare",
28+
"is_unitless",
2829
]
2930

3031
from pandas._libs.tslibs import dtypes
@@ -39,6 +40,7 @@
3940
from pandas._libs.tslibs.np_datetime import (
4041
OutOfBoundsDatetime,
4142
OutOfBoundsTimedelta,
43+
is_unitless,
4244
)
4345
from pandas._libs.tslibs.offsets import (
4446
BaseOffset,

pandas/_libs/tslibs/fields.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def get_date_name_field(
1212
locale: str | None = ...,
1313
) -> npt.NDArray[np.object_]: ...
1414
def get_start_end_field(
15-
dtindex: npt.NDArray[np.int64], # const int64_t[:]
15+
dt64values: npt.NDArray[np.datetime64],
1616
field: str,
1717
freqstr: str | None = ...,
1818
month_kw: int = ...,

pandas/_libs/tslibs/fields.pyx

+24-6
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ objects and arrays
55
from locale import LC_TIME
66

77
from _strptime import LocaleTime
8+
89
cimport cython
910
from cython cimport Py_ssize_t
11+
1012
import numpy as np
1113

1214
cimport numpy as cnp
@@ -41,8 +43,11 @@ from pandas._libs.tslibs.ccalendar cimport (
4143
)
4244
from pandas._libs.tslibs.nattype cimport NPY_NAT
4345
from pandas._libs.tslibs.np_datetime cimport (
46+
NPY_DATETIMEUNIT,
4447
dt64_to_dtstruct,
48+
get_unit_from_dtype,
4549
npy_datetimestruct,
50+
pandas_datetime_to_datetimestruct,
4651
pandas_timedeltastruct,
4752
td64_to_tdstruct,
4853
)
@@ -196,22 +201,35 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:
196201

197202
@cython.wraparound(False)
198203
@cython.boundscheck(False)
199-
def get_start_end_field(const int64_t[:] dtindex, str field,
204+
def get_start_end_field(ndarray dt64values, str field,
200205
str freqstr=None, int month_kw=12):
201206
"""
202207
Given an int64-based datetime index return array of indicators
203208
of whether timestamps are at the start/end of the month/quarter/year
204209
(defined by frequency).
210+
211+
Parameters
212+
----------
213+
dt64values : ndarray[datetime64], any resolution
214+
field : str
215+
frestr : str or None, default None
216+
month_kw : int, default 12
217+
218+
Returns
219+
-------
220+
ndarray[bool]
205221
"""
206222
cdef:
207223
Py_ssize_t i
208-
int count = len(dtindex)
224+
int count = dt64values.size
209225
bint is_business = 0
210226
int end_month = 12
211227
int start_month = 1
212228
ndarray[int8_t] out
213229
npy_datetimestruct dts
214230
int compare_month, modby
231+
ndarray dtindex = dt64values.view("i8")
232+
NPY_DATETIMEUNIT reso = get_unit_from_dtype(dt64values.dtype)
215233

216234
out = np.zeros(count, dtype='int8')
217235

@@ -251,7 +269,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
251269
out[i] = 0
252270
continue
253271

254-
dt64_to_dtstruct(dtindex[i], &dts)
272+
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)
255273

256274
if _is_on_month(dts.month, compare_month, modby) and (
257275
dts.day == get_firstbday(dts.year, dts.month)):
@@ -263,7 +281,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
263281
out[i] = 0
264282
continue
265283

266-
dt64_to_dtstruct(dtindex[i], &dts)
284+
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)
267285

268286
if _is_on_month(dts.month, compare_month, modby) and dts.day == 1:
269287
out[i] = 1
@@ -275,7 +293,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
275293
out[i] = 0
276294
continue
277295

278-
dt64_to_dtstruct(dtindex[i], &dts)
296+
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)
279297

280298
if _is_on_month(dts.month, compare_month, modby) and (
281299
dts.day == get_lastbday(dts.year, dts.month)):
@@ -287,7 +305,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
287305
out[i] = 0
288306
continue
289307

290-
dt64_to_dtstruct(dtindex[i], &dts)
308+
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)
291309

292310
if _is_on_month(dts.month, compare_month, modby) and (
293311
dts.day == get_days_in_month(dts.year, dts.month)):

pandas/_libs/tslibs/np_datetime.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
99
def astype_overflowsafe(
1010
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
1111
) -> np.ndarray: ...
12+
def is_unitless(dtype: np.dtype) -> bool: ...

pandas/_libs/tslibs/np_datetime.pyx

+12
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,18 @@ def py_get_unit_from_dtype(dtype):
9292
return get_unit_from_dtype(dtype)
9393

9494

95+
def is_unitless(dtype: cnp.dtype) -> bool:
96+
"""
97+
Check if a datetime64 or timedelta64 dtype has no attached unit.
98+
"""
99+
if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]:
100+
raise ValueError("is_unitless dtype must be datetime64 or timedelta64")
101+
cdef:
102+
NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype)
103+
104+
return unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
105+
106+
95107
# ----------------------------------------------------------------------
96108
# Comparison
97109

pandas/_libs/tslibs/timedeltas.pyi

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ class Timedelta(timedelta):
8888
# GH 46171
8989
# While Timedelta can return pd.NaT, having the constructor return
9090
# a Union with NaTType makes things awkward for users of pandas
91+
@classmethod
92+
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
9193
@property
9294
def days(self) -> int: ...
9395
@property

pandas/_libs/tslibs/timedeltas.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ def array_to_timedelta64(
347347
raise ValueError(
348348
"unit must not be specified if the input contains a str"
349349
)
350+
cnp.PyArray_ITER_NEXT(it)
350351

351352
# Usually, we have all strings. If so, we hit the fast path.
352353
# If this path fails, we try conversion a different way, and

pandas/_libs/tslibs/timestamps.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -489,9 +489,7 @@ cdef class _Timestamp(ABCTimestamp):
489489
dict kwds
490490
ndarray[uint8_t, cast=True] out
491491
int month_kw
492-
493-
if self._reso != NPY_FR_ns:
494-
raise NotImplementedError(self._reso)
492+
str unit
495493

496494
if freq:
497495
kwds = freq.kwds
@@ -502,7 +500,9 @@ cdef class _Timestamp(ABCTimestamp):
502500
freqstr = None
503501

504502
val = self._maybe_convert_value_to_local()
505-
out = get_start_end_field(np.array([val], dtype=np.int64),
503+
504+
unit = npy_unit_to_abbrev(self._reso)
505+
out = get_start_end_field(np.array([val], dtype=f"M8[{unit}]"),
506506
field, freqstr, month_kw)
507507
return out[0]
508508

pandas/_libs/tslibs/vectorized.pyi

+6-6
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,24 @@ from pandas._libs.tslibs.offsets import BaseOffset
1111
from pandas._typing import npt
1212

1313
def dt64arr_to_periodarr(
14-
stamps: npt.NDArray[np.int64], # const int64_t[:]
14+
stamps: npt.NDArray[np.int64],
1515
freq: int,
1616
tz: tzinfo | None,
17-
) -> npt.NDArray[np.int64]: ... # np.ndarray[np.int64, ndim=1]
17+
) -> npt.NDArray[np.int64]: ...
1818
def is_date_array_normalized(
19-
stamps: npt.NDArray[np.int64], # const int64_t[:]
19+
stamps: npt.NDArray[np.int64],
2020
tz: tzinfo | None = ...,
2121
) -> bool: ...
2222
def normalize_i8_timestamps(
23-
stamps: npt.NDArray[np.int64], # const int64_t[:]
23+
stamps: npt.NDArray[np.int64],
2424
tz: tzinfo | None,
2525
) -> npt.NDArray[np.int64]: ...
2626
def get_resolution(
27-
stamps: npt.NDArray[np.int64], # const int64_t[:]
27+
stamps: npt.NDArray[np.int64],
2828
tz: tzinfo | None = ...,
2929
) -> Resolution: ...
3030
def ints_to_pydatetime(
31-
arr: npt.NDArray[np.int64], # const int64_t[:}]
31+
arr: npt.NDArray[np.int64],
3232
tz: tzinfo | None = ...,
3333
freq: BaseOffset | None = ...,
3434
fold: bool = ...,

0 commit comments

Comments
 (0)