Skip to content

Commit 5e95673

Browse files
authored
CLN: assorted (#50644)
* CLN: assorted * troubleshoot
1 parent 502eca6 commit 5e95673

25 files changed

+54
-134
lines changed

pandas/_libs/tslib.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -546,9 +546,6 @@ cpdef array_to_datetime(
546546
else:
547547
# coerce
548548
# we now need to parse this as if unit='ns'
549-
# we can ONLY accept integers at this point
550-
# if we have previously (or in future accept
551-
# datetimes/strings, then we must coerce)
552549
try:
553550
iresult[i] = cast_from_unit(val, "ns")
554551
except OverflowError:

pandas/_libs/tslibs/conversion.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ cdef tzinfo convert_timezone(
5252
)
5353

5454
cdef int64_t parse_pydatetime(
55-
object val,
55+
datetime val,
5656
npy_datetimestruct *dts,
5757
bint utc_convert,
5858
) except? -1

pandas/_libs/tslibs/conversion.pyx

+7-8
Original file line numberDiff line numberDiff line change
@@ -413,8 +413,8 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
413413
414414
Parameters
415415
----------
416-
dts: npy_datetimestruct
417-
tzoffset: int
416+
dts : npy_datetimestruct
417+
tzoffset : int
418418
tz : tzinfo or None
419419
timezone for the timezone-aware output.
420420
reso : NPY_DATETIMEUNIT, default NPY_FR_ns
@@ -463,7 +463,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
463463
return obj
464464

465465

466-
cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
466+
cdef _TSObject _convert_str_to_tsobject(str ts, tzinfo tz, str unit,
467467
bint dayfirst=False,
468468
bint yearfirst=False):
469469
"""
@@ -499,7 +499,6 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
499499
NPY_DATETIMEUNIT out_bestunit, reso
500500

501501
if len(ts) == 0 or ts in nat_strings:
502-
ts = NaT
503502
obj = _TSObject()
504503
obj.value = NPY_NAT
505504
obj.tzinfo = tz
@@ -727,16 +726,16 @@ cdef tzinfo convert_timezone(
727726

728727

729728
cdef int64_t parse_pydatetime(
730-
object val,
731-
npy_datetimestruct *dts,
732-
bint utc_convert,
729+
datetime val,
730+
npy_datetimestruct *dts,
731+
bint utc_convert,
733732
) except? -1:
734733
"""
735734
Convert pydatetime to datetime64.
736735
737736
Parameters
738737
----------
739-
val
738+
val : datetime
740739
Element being processed.
741740
dts : *npy_datetimestruct
742741
Needed to use in pydatetime_to_dt64, which writes to it.

pandas/_libs/tslibs/parsing.pyi

-12
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ def parse_datetime_string(
1111
date_string: str,
1212
dayfirst: bool = ...,
1313
yearfirst: bool = ...,
14-
**kwargs,
1514
) -> datetime: ...
1615
def parse_time_string(
1716
arg: str,
@@ -24,28 +23,17 @@ def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
2423
def try_parse_dates(
2524
values: npt.NDArray[np.object_], # object[:]
2625
parser,
27-
dayfirst: bool = ...,
28-
default: datetime | None = ...,
2926
) -> npt.NDArray[np.object_]: ...
3027
def try_parse_year_month_day(
3128
years: npt.NDArray[np.object_], # object[:]
3229
months: npt.NDArray[np.object_], # object[:]
3330
days: npt.NDArray[np.object_], # object[:]
3431
) -> npt.NDArray[np.object_]: ...
35-
def try_parse_datetime_components(
36-
years: npt.NDArray[np.object_], # object[:]
37-
months: npt.NDArray[np.object_], # object[:]
38-
days: npt.NDArray[np.object_], # object[:]
39-
hours: npt.NDArray[np.object_], # object[:]
40-
minutes: npt.NDArray[np.object_], # object[:]
41-
seconds: npt.NDArray[np.object_], # object[:]
42-
) -> npt.NDArray[np.object_]: ...
4332
def guess_datetime_format(
4433
dt_str,
4534
dayfirst: bool | None = ...,
4635
) -> str | None: ...
4736
def concat_date_cols(
4837
date_cols: tuple,
49-
keep_trivial_numbers: bool = ...,
5038
) -> npt.NDArray[np.object_]: ...
5139
def get_rule_month(source: str) -> str: ...

pandas/_libs/tslibs/parsing.pyx

+8-55
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
209209
raise DateParseError(f"Invalid date specified ({month}/{day})")
210210

211211

212-
cdef bint does_string_look_like_time(str parse_string):
212+
cdef bint _does_string_look_like_time(str parse_string):
213213
"""
214214
Checks whether given string is a time: it has to start either from
215215
H:MM or from HH:MM, and hour and minute values must be valid.
@@ -249,7 +249,6 @@ def parse_datetime_string(
249249
str date_string,
250250
bint dayfirst=False,
251251
bint yearfirst=False,
252-
**kwargs,
253252
) -> datetime:
254253
"""
255254
Parse datetime string, only returns datetime.
@@ -266,10 +265,10 @@ def parse_datetime_string(
266265
if not _does_string_look_like_datetime(date_string):
267266
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
268267

269-
if does_string_look_like_time(date_string):
268+
if _does_string_look_like_time(date_string):
270269
# use current datetime as default, not pass _DEFAULT_DATETIME
271270
dt = du_parse(date_string, dayfirst=dayfirst,
272-
yearfirst=yearfirst, **kwargs)
271+
yearfirst=yearfirst)
273272
return dt
274273

275274
dt, _ = _parse_delimited_date(date_string, dayfirst)
@@ -294,7 +293,7 @@ def parse_datetime_string(
294293

295294
try:
296295
dt = du_parse(date_string, default=_DEFAULT_DATETIME,
297-
dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
296+
dayfirst=dayfirst, yearfirst=yearfirst)
298297
except TypeError:
299298
# following may be raised from dateutil
300299
# TypeError: 'NoneType' object is not iterable
@@ -667,9 +666,7 @@ cdef dateutil_parse(
667666
# Parsing for type-inference
668667

669668

670-
def try_parse_dates(
671-
object[:] values, parser, bint dayfirst=False, default=None,
672-
) -> np.ndarray:
669+
def try_parse_dates(object[:] values, parser) -> np.ndarray:
673670
cdef:
674671
Py_ssize_t i, n
675672
object[::1] result
@@ -705,47 +702,6 @@ def try_parse_year_month_day(
705702
return result.base # .base to access underlying ndarray
706703

707704

708-
def try_parse_datetime_components(object[:] years,
709-
object[:] months,
710-
object[:] days,
711-
object[:] hours,
712-
object[:] minutes,
713-
object[:] seconds) -> np.ndarray:
714-
715-
cdef:
716-
Py_ssize_t i, n
717-
object[::1] result
718-
int secs
719-
double float_secs
720-
double micros
721-
722-
n = len(years)
723-
# TODO(cython3): Use len instead of `shape[0]`
724-
if (
725-
months.shape[0] != n
726-
or days.shape[0] != n
727-
or hours.shape[0] != n
728-
or minutes.shape[0] != n
729-
or seconds.shape[0] != n
730-
):
731-
raise ValueError("Length of all datetime components must be equal")
732-
result = np.empty(n, dtype="O")
733-
734-
for i in range(n):
735-
float_secs = float(seconds[i])
736-
secs = int(float_secs)
737-
738-
micros = float_secs - secs
739-
if micros > 0:
740-
micros = micros * 1000000
741-
742-
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
743-
int(hours[i]), int(minutes[i]), secs,
744-
int(micros))
745-
746-
return result.base # .base to access underlying ndarray
747-
748-
749705
# ----------------------------------------------------------------------
750706
# Miscellaneous
751707

@@ -1001,6 +957,7 @@ cdef str _fill_token(token: str, padding: int):
1001957
token_filled = f"{seconds}.{nanoseconds}"
1002958
return token_filled
1003959

960+
1004961
cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
1005962
"""Warn if guessed datetime format doesn't respect dayfirst argument."""
1006963
cdef:
@@ -1062,16 +1019,13 @@ cdef object convert_to_unicode(object item, bint keep_trivial_numbers):
10621019

10631020
@cython.wraparound(False)
10641021
@cython.boundscheck(False)
1065-
def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndarray:
1022+
def concat_date_cols(tuple date_cols) -> np.ndarray:
10661023
"""
10671024
Concatenates elements from numpy arrays in `date_cols` into strings.
10681025

10691026
Parameters
10701027
----------
10711028
date_cols : tuple[ndarray]
1072-
keep_trivial_numbers : bool, default True
1073-
if True and len(date_cols) == 1, then
1074-
conversion (to string from integer/float zero) is not performed
10751029

10761030
Returns
10771031
-------
@@ -1110,8 +1064,7 @@ def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndar
11101064
it = <flatiter>PyArray_IterNew(array)
11111065
for row_idx in range(rows_count):
11121066
item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
1113-
result_view[row_idx] = convert_to_unicode(item,
1114-
keep_trivial_numbers)
1067+
result_view[row_idx] = convert_to_unicode(item, True)
11151068
PyArray_ITER_NEXT(it)
11161069
else:
11171070
# create fixed size list - more efficient memory allocation

pandas/_libs/tslibs/period.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:
764764
765765
Parameters
766766
----------
767-
dts: npy_datetimestruct*
767+
dts : npy_datetimestruct*
768768
freq : int
769769
770770
Returns

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
955955
result.index = res_index
956956

957957
# infer dtypes
958-
result = result.infer_objects()
958+
result = result.infer_objects(copy=False)
959959

960960
return result
961961

pandas/core/arrays/categorical.py

-4
Original file line numberDiff line numberDiff line change
@@ -471,10 +471,6 @@ def _internal_fill_value(self) -> int:
471471
dtype = self._ndarray.dtype
472472
return dtype.type(-1)
473473

474-
@property
475-
def _constructor(self) -> type[Categorical]:
476-
return Categorical
477-
478474
@classmethod
479475
def _from_sequence(
480476
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False

pandas/core/arrays/datetimelike.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1256,7 +1256,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_
12561256
return new_data
12571257

12581258
@final
1259-
def _addsub_object_array(self, other: np.ndarray, op):
1259+
def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
12601260
"""
12611261
Add or subtract array-like of DateOffset objects
12621262
@@ -1267,10 +1267,14 @@ def _addsub_object_array(self, other: np.ndarray, op):
12671267
12681268
Returns
12691269
-------
1270-
result : same class as self
1270+
np.ndarray[object]
1271+
Except in fastpath case with length 1 where we operate on the
1272+
contained scalar.
12711273
"""
12721274
assert op in [operator.add, operator.sub]
12731275
if len(other) == 1 and self.ndim == 1:
1276+
# Note: without this special case, we could annotate return type
1277+
# as ndarray[object]
12741278
# If both 1D then broadcasting is unambiguous
12751279
return op(self, other[0])
12761280

pandas/core/dtypes/cast.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -793,8 +793,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
793793
if val is NaT or val.tz is None: # type: ignore[comparison-overlap]
794794
val = val.to_datetime64()
795795
dtype = val.dtype
796-
# TODO(2.0): this should be dtype = val.dtype
797-
# to get the correct M8 resolution
798796
# TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
799797
else:
800798
if pandas_dtype:
@@ -1677,8 +1675,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
16771675
arr._validate_setitem_value(element)
16781676
return True
16791677
except (ValueError, TypeError):
1680-
# TODO(2.0): stop catching ValueError for tzaware, see
1681-
# _catch_deprecated_value_error
1678+
# TODO: re-use _catch_deprecated_value_error to ensure we are
1679+
# strict about what exceptions we allow through here.
16821680
return False
16831681

16841682
# This is technically incorrect, but maintains the behavior of

pandas/core/frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -9436,7 +9436,9 @@ def _append(
94369436
row_df = other.to_frame().T
94379437
# infer_objects is needed for
94389438
# test_append_empty_frame_to_series_with_dateutil_tz
9439-
other = row_df.infer_objects().rename_axis(index.names, copy=False)
9439+
other = row_df.infer_objects(copy=False).rename_axis(
9440+
index.names, copy=False
9441+
)
94409442
elif isinstance(other, list):
94419443
if not other:
94429444
pass

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1836,7 +1836,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
18361836
result = result.T
18371837

18381838
# Note: we really only care about inferring numeric dtypes here
1839-
return self._reindex_output(result).infer_objects()
1839+
return self._reindex_output(result).infer_objects(copy=False)
18401840

18411841
def _iterate_column_groupbys(self, obj: DataFrame | Series):
18421842
for i, colname in enumerate(obj.columns):

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3385,7 +3385,7 @@ def _reorder_indexer(
33853385
new_order = np.arange(n)[::-1][indexer]
33863386
elif isinstance(k, slice) and k.start is None and k.stop is None:
33873387
# slice(None) should not determine order GH#31330
3388-
new_order = np.ones((n,))[indexer]
3388+
new_order = np.ones((n,), dtype=np.intp)[indexer]
33893389
else:
33903390
# For all other case, use the same order as the level
33913391
new_order = np.arange(n)[indexer]

pandas/core/indexes/numeric.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,11 @@ def _should_fallback_to_positional(self) -> bool:
231231

232232
@doc(Index._convert_slice_indexer)
233233
def _convert_slice_indexer(self, key: slice, kind: str):
234-
# TODO(2.0): once #45324 deprecation is enforced we should be able
234+
# TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
235235
# to simplify this.
236236
if is_float_dtype(self.dtype):
237237
assert kind in ["loc", "getitem"]
238238

239-
# TODO: can we write this as a condition based on
240-
# e.g. _should_fallback_to_positional?
241239
# We always treat __getitem__ slicing as label-based
242240
# translate to locations
243241
return self.slice_indexer(key.start, key.stop, key.step)

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2166,7 +2166,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
21662166
if not has_dtype:
21672167
# i.e. if we already had a Series or ndarray, keep that
21682168
# dtype. But if we had a list or dict, then do inference
2169-
df = df.infer_objects()
2169+
df = df.infer_objects(copy=False)
21702170
self.obj._mgr = df._mgr
21712171
else:
21722172
self.obj._mgr = self.obj._append(value)._mgr

pandas/io/parsers/base_parser.py

-1
Original file line numberDiff line numberDiff line change
@@ -1117,7 +1117,6 @@ def converter(*date_cols):
11171117
parsing.try_parse_dates(
11181118
parsing.concat_date_cols(date_cols),
11191119
parser=date_parser,
1120-
dayfirst=dayfirst,
11211120
),
11221121
errors="ignore",
11231122
)

pandas/plotting/_matplotlib/core.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ def _compute_plot_data(self):
603603

604604
# GH16953, infer_objects is needed as fallback, for ``Series``
605605
# with ``dtype == object``
606-
data = data.infer_objects()
606+
data = data.infer_objects(copy=False)
607607
include_type = [np.number, "datetime", "datetimetz", "timedelta"]
608608

609609
# GH23719, allow plotting boolean

pandas/plotting/_matplotlib/hist.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _args_adjust(self) -> None:
8080

8181
def _calculate_bins(self, data: DataFrame) -> np.ndarray:
8282
"""Calculate bins given data"""
83-
nd_values = data.infer_objects()._get_numeric_data()
83+
nd_values = data.infer_objects(copy=False)._get_numeric_data()
8484
values = np.ravel(nd_values)
8585
values = values[~isna(values)]
8686

0 commit comments

Comments
 (0)