Skip to content

Commit 4a65264

Browse files
authored
CLN: Assorted cleanups (#46243)
1 parent 02d5356 commit 4a65264

File tree

14 files changed

+103
-116
lines changed

14 files changed

+103
-116
lines changed

pandas/_libs/groupby.pyx

+51-21
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ from numpy.math cimport NAN
3030

3131
cnp.import_array()
3232

33+
from pandas._libs cimport util
3334
from pandas._libs.algos cimport kth_smallest_c
34-
from pandas._libs.util cimport get_nat
3535

3636
from pandas._libs.algos import (
3737
ensure_platform_int,
@@ -49,7 +49,7 @@ from pandas._libs.dtypes cimport (
4949
from pandas._libs.missing cimport checknull
5050

5151

52-
cdef int64_t NPY_NAT = get_nat()
52+
cdef int64_t NPY_NAT = util.get_nat()
5353
_int64_max = np.iinfo(np.int64).max
5454

5555
cdef float64_t NaN = <float64_t>np.NaN
@@ -248,13 +248,7 @@ def group_cumsum(
248248
accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
249249
compensation = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
250250

251-
if numeric_t == float32_t or numeric_t == float64_t:
252-
na_val = NaN
253-
elif numeric_t is int64_t and is_datetimelike:
254-
na_val = NPY_NAT
255-
else:
256-
# Will not be used, but define to avoid unitialized warning.
257-
na_val = 0
251+
na_val = _get_na_val(<numeric_t>0, is_datetimelike)
258252

259253
with nogil:
260254
for i in range(N):
@@ -995,6 +989,47 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
995989
return False
996990

997991

992+
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max):
993+
"""
994+
Find either the min or the max supported by numeric_t; 'val' is a placeholder
995+
to effectively make numeric_t an argument.
996+
"""
997+
if numeric_t is int64_t:
998+
return -_int64_max if compute_max else util.INT64_MAX
999+
elif numeric_t is int32_t:
1000+
return util.INT32_MIN if compute_max else util.INT32_MAX
1001+
elif numeric_t is int16_t:
1002+
return util.INT16_MIN if compute_max else util.INT16_MAX
1003+
elif numeric_t is int8_t:
1004+
return util.INT8_MIN if compute_max else util.INT8_MAX
1005+
1006+
elif numeric_t is uint64_t:
1007+
return 0 if compute_max else util.UINT64_MAX
1008+
elif numeric_t is uint32_t:
1009+
return 0 if compute_max else util.UINT32_MAX
1010+
elif numeric_t is uint16_t:
1011+
return 0 if compute_max else util.UINT16_MAX
1012+
elif numeric_t is uint8_t:
1013+
return 0 if compute_max else util.UINT8_MAX
1014+
1015+
else:
1016+
return -np.inf if compute_max else np.inf
1017+
1018+
1019+
cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):
1020+
cdef:
1021+
numeric_t na_val
1022+
1023+
if numeric_t == float32_t or numeric_t == float64_t:
1024+
na_val = NaN
1025+
elif numeric_t is int64_t and is_datetimelike:
1026+
na_val = NPY_NAT
1027+
else:
1028+
# Will not be used, but define to avoid unitialized warning.
1029+
na_val = 0
1030+
return na_val
1031+
1032+
9981033
# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
9991034
# use `const iu_64_floating_obj_t[:, :] values`
10001035
@cython.wraparound(False)
@@ -1359,16 +1394,17 @@ cdef group_min_max(
13591394
nobs = np.zeros((<object>out).shape, dtype=np.int64)
13601395

13611396
group_min_or_max = np.empty_like(out)
1397+
group_min_or_max[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max)
1398+
13621399
if iu_64_floating_t is int64_t:
1363-
group_min_or_max[:] = -_int64_max if compute_max else _int64_max
1400+
# TODO: only if is_datetimelike?
13641401
nan_val = NPY_NAT
13651402
elif iu_64_floating_t is uint64_t:
13661403
# NB: We do not define nan_val because there is no such thing
13671404
# for uint64_t. We carefully avoid having to reference it in this
13681405
# case.
1369-
group_min_or_max[:] = 0 if compute_max else np.iinfo(np.uint64).max
1406+
pass
13701407
else:
1371-
group_min_or_max[:] = -np.inf if compute_max else np.inf
13721408
nan_val = NAN
13731409

13741410
N, K = (<object>values).shape
@@ -1527,26 +1563,20 @@ cdef group_cummin_max(
15271563
bint isna_entry
15281564

15291565
accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
1530-
if iu_64_floating_t is int64_t:
1531-
accum[:] = -_int64_max if compute_max else _int64_max
1532-
elif iu_64_floating_t is uint64_t:
1533-
accum[:] = 0 if compute_max else np.iinfo(np.uint64).max
1534-
else:
1535-
accum[:] = -np.inf if compute_max else np.inf
1566+
accum[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max)
1567+
1568+
na_val = _get_na_val(<iu_64_floating_t>0, is_datetimelike)
15361569

15371570
if uses_mask:
15381571
na_possible = True
15391572
# Will never be used, just to avoid uninitialized warning
15401573
na_val = 0
15411574
elif iu_64_floating_t is float64_t or iu_64_floating_t is float32_t:
1542-
na_val = NaN
15431575
na_possible = True
15441576
elif is_datetimelike:
1545-
na_val = NPY_NAT
15461577
na_possible = True
15471578
else:
15481579
# Will never be used, just to avoid uninitialized warning
1549-
na_val = 0
15501580
na_possible = False
15511581

15521582
if na_possible:

pandas/_libs/tslibs/conversion.pyx

+3-9
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ cdef inline bint _infer_tsobject_fold(
757757
_TSObject obj,
758758
const int64_t[:] trans,
759759
const int64_t[:] deltas,
760-
int32_t pos,
760+
intp_t pos,
761761
):
762762
"""
763763
Infer _TSObject fold property from value by assuming 0 and then setting
@@ -770,7 +770,7 @@ cdef inline bint _infer_tsobject_fold(
770770
ndarray of offset transition points in nanoseconds since epoch.
771771
deltas : int64_t[:]
772772
array of offsets corresponding to transition points in trans.
773-
pos : int32_t
773+
pos : intp_t
774774
Position of the last transition point before taking fold into account.
775775
776776
Returns
@@ -828,13 +828,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
828828
return dt
829829
elif isinstance(dt, ABCTimestamp):
830830
return dt.tz_localize(tz)
831-
elif is_utc(tz):
832-
return _localize_pydatetime(dt, tz)
833-
try:
834-
# datetime.replace with pytz may be incorrect result
835-
return tz.localize(dt)
836-
except AttributeError:
837-
return dt.replace(tzinfo=tz)
831+
return _localize_pydatetime(dt, tz)
838832

839833

840834
# ----------------------------------------------------------------------

pandas/_libs/tslibs/tzconversion.pyx

+12-43
Original file line numberDiff line numberDiff line change
@@ -513,12 +513,13 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
513513

514514
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
515515
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
516-
cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz,
517-
bint to_utc,
518-
bint *fold=NULL) except? -1:
516+
cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True,
517+
bint* fold=NULL) except? -1:
519518
"""
520-
Calculate offset in nanoseconds needed to convert the i8 representation of
521-
a datetime from a tzlocal timezone to UTC, or vice-versa.
519+
Convert the i8 representation of a datetime from a tzlocal timezone to
520+
UTC, or vice-versa.
521+
522+
Private, not intended for use outside of tslibs.conversion
522523
523524
Parameters
524525
----------
@@ -529,10 +530,11 @@ cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz,
529530
fold : bint*, default NULL
530531
pointer to fold: whether datetime ends up in a fold or not
531532
after adjustment
533+
Only passed with to_utc=False.
532534
533535
Returns
534536
-------
535-
delta : int64_t
537+
result : int64_t
536538
537539
Notes
538540
-----
@@ -553,45 +555,12 @@ cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz,
553555
dt = dt.replace(tzinfo=tzutc())
554556
dt = dt.astimezone(tz)
555557

556-
if fold is not NULL:
557-
fold[0] = dt.fold
558+
if fold is not NULL:
559+
# NB: fold is only passed with to_utc=False
560+
fold[0] = dt.fold
558561

559562
td = tz.utcoffset(dt)
560-
return int(td.total_seconds() * 1_000_000_000)
561-
562-
563-
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
564-
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
565-
cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True,
566-
bint* fold=NULL) except? -1:
567-
"""
568-
Convert the i8 representation of a datetime from a tzlocal timezone to
569-
UTC, or vice-versa.
570-
571-
Private, not intended for use outside of tslibs.conversion
572-
573-
Parameters
574-
----------
575-
val : int64_t
576-
tz : tzinfo
577-
to_utc : bint
578-
True if converting tzlocal _to_ UTC, False if going the other direction
579-
fold : bint*
580-
pointer to fold: whether datetime ends up in a fold or not
581-
after adjustment
582-
583-
Returns
584-
-------
585-
result : int64_t
586-
587-
Notes
588-
-----
589-
Sets fold by pointer
590-
"""
591-
cdef:
592-
int64_t delta
593-
594-
delta = _tzlocal_get_offset_components(val, tz, to_utc, fold)
563+
delta = int(td.total_seconds() * 1_000_000_000)
595564

596565
if to_utc:
597566
return val - delta

pandas/_libs/tslibs/vectorized.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def get_resolution(
3030
def ints_to_pydatetime(
3131
arr: npt.NDArray[np.int64], # const int64_t[:}]
3232
tz: tzinfo | None = ...,
33-
freq: str | BaseOffset | None = ...,
33+
freq: BaseOffset | None = ...,
3434
fold: bool = ...,
3535
box: str = ...,
3636
) -> npt.NDArray[np.object_]: ...

pandas/_libs/tslibs/vectorized.pyx

+4-9
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ from .np_datetime cimport (
2727
dt64_to_dtstruct,
2828
npy_datetimestruct,
2929
)
30-
from .offsets cimport to_offset
30+
from .offsets cimport BaseOffset
3131
from .period cimport get_period_ordinal
3232
from .timestamps cimport create_timestamp_from_ts
3333
from .timezones cimport (
@@ -87,7 +87,7 @@ cdef inline object create_time_from_ts(
8787
def ints_to_pydatetime(
8888
const int64_t[:] stamps,
8989
tzinfo tz=None,
90-
object freq=None,
90+
BaseOffset freq=None,
9191
bint fold=False,
9292
str box="datetime"
9393
) -> np.ndarray:
@@ -99,7 +99,7 @@ def ints_to_pydatetime(
9999
stamps : array of i8
100100
tz : str, optional
101101
convert to this timezone
102-
freq : str/Offset, optional
102+
freq : BaseOffset, optional
103103
freq to convert
104104
fold : bint, default is 0
105105
Due to daylight saving time, one wall clock time can occur twice
@@ -138,9 +138,6 @@ def ints_to_pydatetime(
138138
func_create = create_date_from_ts
139139
elif box == "timestamp":
140140
func_create = create_timestamp_from_ts
141-
142-
if isinstance(freq, str):
143-
freq = to_offset(freq)
144141
elif box == "time":
145142
func_create = create_time_from_ts
146143
elif box == "datetime":
@@ -311,7 +308,6 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
311308
pos = trans.searchsorted(stamps, side="right") - 1
312309

313310
for i in range(n):
314-
# TODO: reinstate nogil for use_utc case?
315311
if stamps[i] == NPY_NAT:
316312
result[i] = NPY_NAT
317313
continue
@@ -393,7 +389,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
393389
@cython.boundscheck(False)
394390
def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz):
395391
cdef:
396-
Py_ssize_t n = len(stamps)
392+
Py_ssize_t i, n = len(stamps)
397393
int64_t[:] result = np.empty(n, dtype=np.int64)
398394
ndarray[int64_t] trans
399395
int64_t[:] deltas
@@ -416,7 +412,6 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz):
416412
pos = trans.searchsorted(stamps, side="right") - 1
417413

418414
for i in range(n):
419-
# TODO: reinstate nogil for use_utc case?
420415
if stamps[i] == NPY_NAT:
421416
result[i] = NPY_NAT
422417
continue

pandas/core/arrays/categorical.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@
4949
from pandas.util._exceptions import find_stack_level
5050
from pandas.util._validators import validate_bool_kwarg
5151

52-
from pandas.core.dtypes.cast import (
53-
coerce_indexer_dtype,
54-
maybe_cast_to_extension_array,
55-
)
52+
from pandas.core.dtypes.cast import coerce_indexer_dtype
5653
from pandas.core.dtypes.common import (
5754
ensure_int64,
5855
ensure_platform_int,
@@ -64,7 +61,6 @@
6461
is_hashable,
6562
is_integer_dtype,
6663
is_list_like,
67-
is_object_dtype,
6864
is_scalar,
6965
is_timedelta64_dtype,
7066
needs_i8_conversion,
@@ -2758,13 +2754,6 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
27582754
codes = _get_codes_for_values(flat, categories)
27592755
return codes.reshape(values.shape)
27602756

2761-
if isinstance(categories.dtype, ExtensionDtype) and is_object_dtype(values):
2762-
# Support inferring the correct extension dtype from an array of
2763-
# scalar objects. e.g.
2764-
# Categorical(array[Period, Period], categories=PeriodIndex(...))
2765-
cls = categories.dtype.construct_array_type()
2766-
values = maybe_cast_to_extension_array(cls, values)
2767-
27682757
codes = categories.get_indexer_for(values)
27692758
return coerce_indexer_dtype(codes, categories)
27702759

pandas/core/dtypes/cast.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1268,9 +1268,9 @@ def maybe_cast_to_datetime(
12681268

12691269
if is_timedelta64_dtype(dtype):
12701270
# TODO: _from_sequence would raise ValueError in cases where
1271-
# ensure_nanosecond_dtype raises TypeError
1271+
# _ensure_nanosecond_dtype raises TypeError
12721272
dtype = cast(np.dtype, dtype)
1273-
dtype = ensure_nanosecond_dtype(dtype)
1273+
dtype = _ensure_nanosecond_dtype(dtype)
12741274
res = TimedeltaArray._from_sequence(value, dtype=dtype)
12751275
return res
12761276

@@ -1281,7 +1281,7 @@ def maybe_cast_to_datetime(
12811281
vdtype = getattr(value, "dtype", None)
12821282

12831283
if is_datetime64 or is_datetime64tz:
1284-
dtype = ensure_nanosecond_dtype(dtype)
1284+
dtype = _ensure_nanosecond_dtype(dtype)
12851285

12861286
value = np.array(value, copy=False)
12871287

@@ -1399,14 +1399,14 @@ def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarra
13991399
return values
14001400

14011401

1402-
def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
1402+
def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
14031403
"""
14041404
Convert dtypes with granularity less than nanosecond to nanosecond
14051405
1406-
>>> ensure_nanosecond_dtype(np.dtype("M8[s]"))
1406+
>>> _ensure_nanosecond_dtype(np.dtype("M8[s]"))
14071407
dtype('<M8[ns]')
14081408
1409-
>>> ensure_nanosecond_dtype(np.dtype("m8[ps]"))
1409+
>>> _ensure_nanosecond_dtype(np.dtype("m8[ps]"))
14101410
Traceback (most recent call last):
14111411
...
14121412
TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]]

0 commit comments

Comments
 (0)