Skip to content

Commit cbe1b32

Browse files
authored
CLN: assorted (#56431)
* CLN: assorted * CLN: assorted * revert bit * revert bit again * mypy fixup * revert
1 parent db6fd22 commit cbe1b32

22 files changed

+50
-54
lines changed

pandas/_libs/tslib.pyx

+14-15
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def array_with_unit_to_datetime(
338338
f"unit='{unit}' not valid with non-numerical val='{val}'"
339339
)
340340

341-
except (ValueError, OutOfBoundsDatetime, TypeError) as err:
341+
except (ValueError, TypeError) as err:
342342
if is_raise:
343343
err.args = (f"{err}, at position {i}",)
344344
raise
@@ -435,15 +435,15 @@ cpdef array_to_datetime(
435435
Parameters
436436
----------
437437
values : ndarray of object
438-
date-like objects to convert
438+
date-like objects to convert
439439
errors : str, default 'raise'
440-
error behavior when parsing
440+
error behavior when parsing
441441
dayfirst : bool, default False
442-
dayfirst parsing behavior when encountering datetime strings
442+
dayfirst parsing behavior when encountering datetime strings
443443
yearfirst : bool, default False
444-
yearfirst parsing behavior when encountering datetime strings
444+
yearfirst parsing behavior when encountering datetime strings
445445
utc : bool, default False
446-
indicator whether the dates should be UTC
446+
indicator whether the dates should be UTC
447447
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
448448
Set to NPY_FR_GENERIC to infer a resolution.
449449
@@ -464,7 +464,7 @@ cpdef array_to_datetime(
464464
bint is_ignore = errors == "ignore"
465465
bint is_coerce = errors == "coerce"
466466
bint is_same_offsets
467-
_TSObject _ts
467+
_TSObject tsobj
468468
float tz_offset
469469
set out_tzoffset_vals = set()
470470
tzinfo tz, tz_out = None
@@ -550,29 +550,28 @@ cpdef array_to_datetime(
550550
creso = state.creso
551551
continue
552552

553-
_ts = convert_str_to_tsobject(
553+
tsobj = convert_str_to_tsobject(
554554
val, None, dayfirst=dayfirst, yearfirst=yearfirst
555555
)
556556

557-
if _ts.value == NPY_NAT:
557+
if tsobj.value == NPY_NAT:
558558
# e.g. "NaT" string or empty string, we do not consider
559559
# this as either tzaware or tznaive. See
560560
# test_to_datetime_with_empty_str_utc_false_format_mixed
561561
# We also do not update resolution inference based on this,
562562
# see test_infer_with_nat_int_float_str
563-
iresult[i] = _ts.value
563+
iresult[i] = tsobj.value
564564
continue
565565

566-
item_reso = _ts.creso
566+
item_reso = tsobj.creso
567567
state.update_creso(item_reso)
568568
if infer_reso:
569569
creso = state.creso
570570

571-
_ts.ensure_reso(creso, val)
572-
573-
iresult[i] = _ts.value
571+
tsobj.ensure_reso(creso, val)
572+
iresult[i] = tsobj.value
574573

575-
tz = _ts.tzinfo
574+
tz = tsobj.tzinfo
576575
if tz is not None:
577576
# dateutil timezone objects cannot be hashed, so
578577
# store the UTC offsets in seconds instead

pandas/_libs/tslibs/conversion.pyx

+2-3
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ from cpython.datetime cimport (
2929
import_datetime()
3030

3131
from pandas._libs.missing cimport checknull_with_nat_and_na
32-
from pandas._libs.tslibs.base cimport ABCTimestamp
3332
from pandas._libs.tslibs.dtypes cimport (
3433
abbrev_to_npy_unit,
3534
get_supported_reso,
@@ -492,7 +491,7 @@ cdef _TSObject convert_datetime_to_tsobject(
492491
pydatetime_to_dtstruct(ts, &obj.dts)
493492
obj.tzinfo = ts.tzinfo
494493

495-
if isinstance(ts, ABCTimestamp):
494+
if isinstance(ts, _Timestamp):
496495
obj.dts.ps = ts.nanosecond * 1000
497496

498497
if nanos:
@@ -766,7 +765,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
766765
"""
767766
if tz is None:
768767
return dt
769-
elif isinstance(dt, ABCTimestamp):
768+
elif isinstance(dt, _Timestamp):
770769
return dt.tz_localize(tz)
771770
return _localize_pydatetime(dt, tz)
772771

pandas/_libs/tslibs/dtypes.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ OFFSET_TO_PERIOD_FREQSTR: dict[str, str]
44

55
def periods_per_day(reso: int = ...) -> int: ...
66
def periods_per_second(reso: int) -> int: ...
7-
def abbrev_to_npy_unit(abbrev: str) -> int: ...
7+
def abbrev_to_npy_unit(abbrev: str | None) -> int: ...
88
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...
99

1010
class PeriodDtypeBase:

pandas/_libs/tslibs/np_datetime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ cdef int64_t get_conversion_factor(
596596
):
597597
raise ValueError("unit-less resolutions are not supported")
598598
if from_unit > to_unit:
599-
raise ValueError
599+
raise ValueError("from_unit must be <= to_unit")
600600

601601
if from_unit == to_unit:
602602
return 1

pandas/_libs/tslibs/strptime.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,14 @@ def array_strptime(
319319
Py_ssize_t i, n = len(values)
320320
npy_datetimestruct dts
321321
int64_t[::1] iresult
322-
object val, tz
322+
object val
323323
bint seen_datetime_offset = False
324324
bint is_raise = errors=="raise"
325325
bint is_ignore = errors=="ignore"
326326
bint is_coerce = errors=="coerce"
327327
bint is_same_offsets
328328
set out_tzoffset_vals = set()
329-
tzinfo tz_out = None
329+
tzinfo tz, tz_out = None
330330
bint iso_format = format_is_iso(fmt)
331331
NPY_DATETIMEUNIT out_bestunit, item_reso
332332
int out_local = 0, out_tzoffset = 0
@@ -484,7 +484,7 @@ def array_strptime(
484484
tz = None
485485
out_tzoffset_vals.add("naive")
486486

487-
except (ValueError, OutOfBoundsDatetime) as ex:
487+
except ValueError as ex:
488488
ex.args = (
489489
f"{str(ex)}, at position {i}. You might want to try:\n"
490490
" - passing `format` if your strings have a consistent format;\n"
@@ -1084,7 +1084,7 @@ cdef tzinfo parse_timezone_directive(str z):
10841084
cdef:
10851085
int hours, minutes, seconds, pad_number, microseconds
10861086
int total_minutes
1087-
object gmtoff_remainder, gmtoff_remainder_padding
1087+
str gmtoff_remainder, gmtoff_remainder_padding
10881088

10891089
if z == "Z":
10901090
return timezone(timedelta(0))

pandas/_libs/tslibs/timedeltas.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -499,9 +499,9 @@ cdef int64_t parse_timedelta_string(str ts) except? -1:
499499
"""
500500

501501
cdef:
502-
unicode c
502+
str c
503503
bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0
504-
object current_unit = None
504+
str current_unit = None
505505
int64_t result = 0, m = 0, r
506506
list number = [], frac = [], unit = []
507507

pandas/core/dtypes/dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,7 @@ class NumpyEADtype(ExtensionDtype):
14531453

14541454
def __init__(self, dtype: npt.DTypeLike | NumpyEADtype | None) -> None:
14551455
if isinstance(dtype, NumpyEADtype):
1456-
# make constructor univalent
1456+
# make constructor idempotent
14571457
dtype = dtype.numpy_dtype
14581458
self._dtype = np.dtype(dtype)
14591459

pandas/core/frame.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@
143143
from pandas.core.arrays.sparse import SparseFrameAccessor
144144
from pandas.core.construction import (
145145
ensure_wrapped_if_datetimelike,
146-
extract_array,
147146
sanitize_array,
148147
sanitize_masked_array,
149148
)
@@ -8784,11 +8783,11 @@ def combine_first(self, other: DataFrame) -> DataFrame:
87848783
"""
87858784
from pandas.core.computation import expressions
87868785

8787-
def combiner(x, y):
8788-
mask = extract_array(isna(x))
8786+
def combiner(x: Series, y: Series):
8787+
mask = x.isna()._values
87898788

8790-
x_values = extract_array(x, extract_numpy=True)
8791-
y_values = extract_array(y, extract_numpy=True)
8789+
x_values = x._values
8790+
y_values = y._values
87928791

87938792
# If the column y in other DataFrame is not in first DataFrame,
87948793
# just return y_values.

pandas/core/tools/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@ def coerce(values):
12011201
values = to_numeric(values, errors=errors)
12021202

12031203
# prevent overflow in case of int8 or int16
1204-
if is_integer_dtype(values):
1204+
if is_integer_dtype(values.dtype):
12051205
values = values.astype("int64", copy=False)
12061206
return values
12071207

pandas/io/sas/sas7bdat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
8686
----------
8787
sas_datetimes : {Series, Sequence[float]}
8888
Dates or datetimes in SAS
89-
unit : {str}
89+
unit : {'d', 's'}
9090
"d" if the floats represent dates, "s" for datetimes
9191
9292
Returns

pandas/io/stata.py

-3
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,6 @@
234234
stata_epoch: Final = datetime(1960, 1, 1)
235235

236236

237-
# TODO: Add typing. As of January 2020 it is not possible to type this function since
238-
# mypy doesn't understand that a Series and an int can be combined using mathematical
239-
# operations. (+, -).
240237
def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
241238
"""
242239
Convert from SIF to datetime. https://www.stata.com/help.cgi?datetime

pandas/plotting/_matplotlib/core.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1495,7 +1495,7 @@ def _is_ts_plot(self) -> bool:
14951495
return not self.x_compat and self.use_index and self._use_dynamic_x()
14961496

14971497
@final
1498-
def _use_dynamic_x(self):
1498+
def _use_dynamic_x(self) -> bool:
14991499
return use_dynamic_x(self._get_ax(0), self.data)
15001500

15011501
def _make_plot(self, fig: Figure) -> None:
@@ -1537,7 +1537,7 @@ def _make_plot(self, fig: Figure) -> None:
15371537
errors = self._get_errorbars(label=label, index=i)
15381538
kwds = dict(kwds, **errors)
15391539

1540-
label = pprint_thing(label) # .encode('utf-8')
1540+
label = pprint_thing(label)
15411541
label = self._mark_right_label(label, index=i)
15421542
kwds["label"] = label
15431543

pandas/tests/arithmetic/test_datetime64.py

+2
Original file line numberDiff line numberDiff line change
@@ -1082,6 +1082,8 @@ def test_dt64arr_addsub_intlike(
10821082
self, request, dtype, index_or_series_or_array, freq, tz_naive_fixture
10831083
):
10841084
# GH#19959, GH#19123, GH#19012
1085+
# GH#55860 use index_or_series_or_array instead of box_with_array
1086+
# bc DataFrame alignment makes it inapplicable
10851087
tz = tz_naive_fixture
10861088

10871089
if freq is None:

pandas/tests/arithmetic/test_timedelta64.py

+1
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ def test_addition_ops(self):
497497
tdi + Index([1, 2, 3], dtype=np.int64)
498498

499499
# this is a union!
500+
# FIXME: don't leave commented-out
500501
# pytest.raises(TypeError, lambda : Index([1,2,3]) + tdi)
501502

502503
result = tdi + dti # name will be reset

pandas/tests/arrays/numpy_/test_numpy.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def test_constructor_from_string():
8787
assert result == expected
8888

8989

90-
def test_dtype_univalent(any_numpy_dtype):
90+
def test_dtype_idempotent(any_numpy_dtype):
9191
dtype = NumpyEADtype(any_numpy_dtype)
9292

9393
result = NumpyEADtype(dtype)

pandas/tests/arrays/string_/test_string.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ def test_min_max_numpy(method, box, dtype, request, arrow_string_storage):
463463
assert result == expected
464464

465465

466-
def test_fillna_args(dtype, request, arrow_string_storage):
466+
def test_fillna_args(dtype, arrow_string_storage):
467467
# GH 37987
468468

469469
arr = pd.array(["a", pd.NA], dtype=dtype)

pandas/tests/arrays/test_datetimelike.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,7 @@ def test_take_fill_raises(self, fill_value, arr1d):
183183
arr1d.take([0, 1], allow_fill=True, fill_value=fill_value)
184184

185185
def test_take_fill(self, arr1d):
186-
np.arange(10, dtype="i8") * 24 * 3600 * 10**9
187-
188-
arr = arr1d # self.array_cls(data, freq="D")
186+
arr = arr1d
189187

190188
result = arr.take([-1, 1], allow_fill=True, fill_value=None)
191189
assert result[0] is NaT

pandas/tests/indexes/datetimes/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,7 @@ def test_dti_tz_constructors(self, tzstr):
959959
for other in [idx2, idx3, idx4]:
960960
tm.assert_index_equal(idx1, other)
961961

962-
def test_dti_construction_univalent(self, unit):
962+
def test_dti_construction_idempotent(self, unit):
963963
rng = date_range(
964964
"03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern", unit=unit
965965
)

pandas/tests/io/parser/common/test_data_list.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
1717
)
1818

19-
skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
19+
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
2020

2121

22-
@skip_pyarrow
22+
@xfail_pyarrow
2323
def test_read_data_list(all_parsers):
2424
parser = all_parsers
2525
kwargs = {"index_col": 0}

pandas/tests/io/sas/test_sas7bdat.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import pandas as pd
1515
import pandas._testing as tm
1616

17+
from pandas.io.sas.sas7bdat import SAS7BDATReader
18+
1719

1820
@pytest.fixture
1921
def dirpath(datapath):
@@ -127,8 +129,6 @@ def test_encoding_options(datapath):
127129
pass
128130
tm.assert_frame_equal(df1, df2)
129131

130-
from pandas.io.sas.sas7bdat import SAS7BDATReader
131-
132132
with contextlib.closing(SAS7BDATReader(fname, convert_header_text=False)) as rdr:
133133
df3 = rdr.read()
134134
for x, y in zip(df1.columns, df3.columns):
@@ -189,10 +189,9 @@ def test_date_time(datapath):
189189
fname, parse_dates=["Date1", "Date2", "DateTime", "DateTimeHi", "Taiw"]
190190
)
191191
# GH 19732: Timestamps imported from sas will incur floating point errors
192-
# 2023-11-16 we don't know the correct "expected" result bc we do not have
193-
# access to SAS to read the sas7bdat file. We are really just testing
194-
# that we are "close". This only seems to be an issue near the
195-
# implementation bounds.
192+
# See GH#56014 for discussion of the correct "expected" results
193+
# We are really just testing that we are "close". This only seems to be
194+
# an issue near the implementation bounds.
196195

197196
df[df.columns[3]] = df.iloc[:, 3].dt.round("us")
198197
df0["Date1"] = df0["Date1"].astype("M8[s]")
@@ -271,6 +270,7 @@ def test_max_sas_date(datapath):
271270
# NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999
272271
# but this is read as 29DEC9999:23:59:59.998993 by a buggy
273272
# sas7bdat module
273+
# See also GH#56014 for discussion of the correct "expected" results.
274274
fname = datapath("io", "sas", "data", "max_sas_date.sas7bdat")
275275
df = pd.read_sas(fname, encoding="iso-8859-1")
276276

pandas/tests/io/test_stata.py

+1
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def test_read_dta2(self, datapath):
197197
# datapath("io", "data", "stata", "stata2_113.dta")
198198
# )
199199

200+
# FIXME: don't leave commented-out
200201
# buggy test because of the NaT comparison on certain platforms
201202
# Format 113 test fails since it does not support tc and tC formats
202203
# tm.assert_frame_equal(parsed_113, expected)

pandas/tests/tools/test_to_datetime.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3107,7 +3107,7 @@ class TestDatetimeParsingWrappers:
31073107
("Thu Sep 25 2003", datetime(2003, 9, 25)),
31083108
("Sep 25 2003", datetime(2003, 9, 25)),
31093109
("January 1 2014", datetime(2014, 1, 1)),
3110-
# GHE10537
3110+
# GH#10537
31113111
("2014-06", datetime(2014, 6, 1)),
31123112
("06-2014", datetime(2014, 6, 1)),
31133113
("2014-6", datetime(2014, 6, 1)),

0 commit comments

Comments
 (0)