Skip to content

Commit f7df8bf

Browse files
authored
CLN: assorted (#51775)
* CLN: assorted * more specific * more accurate
1 parent 4d10233 commit f7df8bf

32 files changed

+111
-99
lines changed

doc/source/whatsnew/v2.0.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,6 @@ Other API changes
790790
Deprecations
791791
~~~~~~~~~~~~
792792
- Deprecated parsing datetime strings with system-local timezone to ``tzlocal``, pass a ``tz`` keyword or explicitly call ``tz_localize`` instead (:issue:`50791`)
793-
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
794793
- Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
795794
- Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`)
796795
- Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`)

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ Other API changes
9292

9393
Deprecations
9494
~~~~~~~~~~~~
95+
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
9596
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
9697
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
9798
- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)

pandas/_libs/arrays.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,12 @@ cdef class NDArrayBacked:
126126

127127
@property
128128
def size(self) -> int:
129-
return self._ndarray.size
129+
# TODO(cython3): use self._ndarray.size
130+
return cnp.PyArray_SIZE(self._ndarray)
130131

131132
@property
132133
def nbytes(self) -> int:
133-
return self._ndarray.nbytes
134+
return cnp.PyArray_NBYTES(self._ndarray)
134135

135136
def copy(self, order="C"):
136137
cdef:

pandas/_libs/parsers.pyi

+7
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,10 @@ class TextReader:
6767
def close(self) -> None: ...
6868
def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ...
6969
def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ...
70+
71+
# _maybe_upcast, na_values are only exposed for testing
72+
na_values: dict
73+
74+
def _maybe_upcast(
75+
arr, use_nullable_dtypes: bool = ..., dtype_backend: str = ...
76+
) -> np.ndarray: ...

pandas/_libs/tslib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
695695
if ts is NaT:
696696
ival = NPY_NAT
697697
else:
698-
if ts.tz is not None:
698+
if ts.tzinfo is not None:
699699
ts = ts.tz_convert(tz)
700700
else:
701701
# datetime64, tznaive pydatetime, int, float

pandas/_libs/tslibs/conversion.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ cpdef inline (int64_t, int) precision_from_unit(
176176
multiplier = periods_per_second(out_reso)
177177
m = multiplier * 2629746
178178
else:
179+
# Careful: if get_conversion_factor raises, the exception does
180+
# not propagate, instead we get a warning about an ignored exception.
181+
# https://github.com/pandas-dev/pandas/pull/51483#discussion_r1115198951
179182
m = get_conversion_factor(reso, out_reso)
180183

181184
p = <int>log10(m) # number of digits in 'm' minus 1

pandas/_libs/tslibs/offsets.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -4096,7 +4096,7 @@ cpdef to_offset(freq):
40964096
40974097
Returns
40984098
-------
4099-
DateOffset or None
4099+
BaseOffset subclass or None
41004100
41014101
Raises
41024102
------

pandas/_libs/tslibs/src/datetime/np_datetime.c

-23
Original file line numberDiff line numberDiff line change
@@ -224,29 +224,6 @@ static npy_int64 days_to_yearsdays(npy_int64 *days_) {
224224
return year + 2000;
225225
}
226226

227-
/*
228-
* Adjusts a datetimestruct based on a seconds offset. Assumes
229-
* the current values are valid.
230-
*/
231-
NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts,
232-
int seconds) {
233-
int minutes;
234-
235-
dts->sec += seconds;
236-
if (dts->sec < 0) {
237-
minutes = dts->sec / 60;
238-
dts->sec = dts->sec % 60;
239-
if (dts->sec < 0) {
240-
--minutes;
241-
dts->sec += 60;
242-
}
243-
add_minutes_to_datetimestruct(dts, minutes);
244-
} else if (dts->sec >= 60) {
245-
minutes = dts->sec / 60;
246-
dts->sec = dts->sec % 60;
247-
add_minutes_to_datetimestruct(dts, minutes);
248-
}
249-
}
250227

251228
/*
252229
* Fills in the year, month, day in 'dts' based on the days

pandas/_libs/tslibs/tzconversion.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ cdef int64_t tz_localize_to_utc_single(
153153
return val
154154

155155
elif is_utc(tz) or tz is None:
156-
# TODO: test with non-nano
157156
return val
158157

159158
elif is_tzlocal(tz):

pandas/_testing/asserters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1179,8 +1179,8 @@ def assert_frame_equal(
11791179

11801180
# compare by blocks
11811181
if by_blocks:
1182-
rblocks = right._to_dict_of_blocks()
1183-
lblocks = left._to_dict_of_blocks()
1182+
rblocks = right._to_dict_of_blocks(copy=False)
1183+
lblocks = left._to_dict_of_blocks(copy=False)
11841184
for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
11851185
assert dtype in lblocks
11861186
assert dtype in rblocks

pandas/conftest.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
128128

129129

130130
def pytest_collection_modifyitems(items, config) -> None:
131+
is_doctest = config.getoption("--doctest-modules") or config.getoption(
132+
"--doctest-cython", default=False
133+
)
134+
131135
# Warnings from doctests that can be ignored; place reason in comment above.
132136
# Each entry specifies (path, message) - see the ignore_doctest_warning function
133137
ignored_doctest_warnings = [
@@ -136,9 +140,7 @@ def pytest_collection_modifyitems(items, config) -> None:
136140
]
137141

138142
for item in items:
139-
if config.getoption("--doctest-modules") or config.getoption(
140-
"--doctest-cython", default=False
141-
):
143+
if is_doctest:
142144
# autouse=True for the add_doctest_imports can lead to expensive teardowns
143145
# since doctest_namespace is a session fixture
144146
item.add_marker(pytest.mark.usefixtures("add_doctest_imports"))

pandas/core/arrays/period.py

+2
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,8 @@ def _add_timedelta_arraylike(
760760

761761
dtype = np.dtype(f"m8[{freq._td64_unit}]")
762762

763+
# Similar to _check_timedeltalike_freq_compat, but we raise with a
764+
# more specific exception message if necessary.
763765
try:
764766
delta = astype_overflowsafe(
765767
np.asarray(other), dtype=dtype, copy=False, round_ok=False

pandas/core/arrays/timedeltas.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,14 @@
6363
)
6464
from pandas.core.dtypes.missing import isna
6565

66-
from pandas.core import nanops
66+
from pandas.core import (
67+
nanops,
68+
roperator,
69+
)
6770
from pandas.core.array_algos import datetimelike_accumulations
6871
from pandas.core.arrays import datetimelike as dtl
6972
from pandas.core.arrays._ranges import generate_regular_range
7073
import pandas.core.common as com
71-
from pandas.core.ops import roperator
7274
from pandas.core.ops.common import unpack_zerodim_and_defer
7375

7476
if TYPE_CHECKING:

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def _cython_transform(
503503

504504
def _transform_general(self, func: Callable, *args, **kwargs) -> Series:
505505
"""
506-
Transform with a callable func`.
506+
Transform with a callable `func`.
507507
"""
508508
assert callable(func)
509509
klass = type(self.obj)

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1615,7 +1615,7 @@ def _get_list_axis(self, key, axis: AxisInt):
16151615
try:
16161616
return self.obj._take_with_is_copy(key, axis=axis)
16171617
except IndexError as err:
1618-
# re-raise with different error message
1618+
# re-raise with different error message, e.g. test_getitem_ndarray_3d
16191619
raise IndexError("positional indexers are out-of-bounds") from err
16201620

16211621
def _getitem_axis(self, key, axis: AxisInt):

pandas/core/ops/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def to_series(right):
263263
# We need to pass dtype=right.dtype to retain object dtype
264264
# otherwise we lose consistency with Index and array ops
265265
dtype = None
266-
if getattr(right, "dtype", None) == object:
266+
if right.dtype == object:
267267
# can't pass right.dtype unconditionally as that would break on e.g.
268268
# datetime64[h] ndarray
269269
dtype = object

pandas/core/ops/array_ops.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,10 @@
4747
notna,
4848
)
4949

50+
from pandas.core import roperator
5051
from pandas.core.computation import expressions
5152
from pandas.core.construction import ensure_wrapped_if_datetimelike
52-
from pandas.core.ops import (
53-
missing,
54-
roperator,
55-
)
53+
from pandas.core.ops import missing
5654
from pandas.core.ops.dispatch import should_extension_dispatch
5755
from pandas.core.ops.invalid import invalid_comparison
5856

pandas/core/ops/methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
ABCSeries,
1111
)
1212

13-
from pandas.core.ops import roperator
13+
from pandas.core import roperator
1414

1515

1616
def _get_method_wrappers(cls):

pandas/core/ops/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
is_scalar,
3434
)
3535

36-
from pandas.core.ops import roperator
36+
from pandas.core import roperator
3737

3838

3939
def _fill_zeros(result, x, y):

pandas/io/json/_table_schema.py

-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,6 @@ def as_json_table_type(x: DtypeObj) -> str:
8787
return "datetime"
8888
elif is_timedelta64_dtype(x):
8989
return "duration"
90-
elif is_categorical_dtype(x):
91-
return "any"
9290
elif is_extension_array_dtype(x):
9391
return "any"
9492
elif is_string_dtype(x):

pandas/tests/arithmetic/test_datetime64.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
date_range,
3535
)
3636
import pandas._testing as tm
37-
from pandas.core.ops import roperator
37+
from pandas.core import roperator
3838
from pandas.tests.arithmetic.common import (
3939
assert_cannot_add,
4040
assert_invalid_addsub_type,
@@ -1550,9 +1550,8 @@ def test_dt64arr_add_sub_DateOffset(self, box_with_array):
15501550
],
15511551
)
15521552
@pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub])
1553-
@pytest.mark.parametrize("box_other", [True, False])
15541553
def test_dt64arr_add_sub_offset_array(
1555-
self, tz_naive_fixture, box_with_array, box_other, op, other
1554+
self, tz_naive_fixture, box_with_array, op, other
15561555
):
15571556
# GH#18849
15581557
# GH#10699 array of offsets
@@ -1561,19 +1560,20 @@ def test_dt64arr_add_sub_offset_array(
15611560
dti = date_range("2017-01-01", periods=2, tz=tz)
15621561
dtarr = tm.box_expected(dti, box_with_array)
15631562

1564-
other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
15651563
expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))])
15661564
expected = tm.box_expected(expected, box_with_array).astype(object)
15671565

1568-
if box_other:
1569-
other = tm.box_expected(other, box_with_array)
1570-
if box_with_array is pd.array and op is roperator.radd:
1571-
# We expect a PandasArray, not ndarray[object] here
1572-
expected = pd.array(expected, dtype=object)
1573-
15741566
with tm.assert_produces_warning(PerformanceWarning):
15751567
res = op(dtarr, other)
1568+
tm.assert_equal(res, expected)
15761569

1570+
# Same thing but boxing other
1571+
other = tm.box_expected(other, box_with_array)
1572+
if box_with_array is pd.array and op is roperator.radd:
1573+
# We expect a PandasArray, not ndarray[object] here
1574+
expected = pd.array(expected, dtype=object)
1575+
with tm.assert_produces_warning(PerformanceWarning):
1576+
res = op(dtarr, other)
15771577
tm.assert_equal(res, expected)
15781578

15791579
@pytest.mark.parametrize(

pandas/tests/arrays/sparse/test_libsparse.py

+2
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ def test_intersect_empty(self):
212212
@pytest.mark.parametrize(
213213
"case",
214214
[
215+
# Argument 2 to "IntIndex" has incompatible type "ndarray[Any,
216+
# dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]"
215217
IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type]
216218
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type]
217219
IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type]

pandas/tests/extension/test_arrow.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,15 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
389389
if all_numeric_accumulations != "cumsum" or pa_version_under9p0:
390390
# xfailing takes a long time to run because pytest
391391
# renders the exception messages even when not showing them
392-
pytest.skip(f"{all_numeric_accumulations} not implemented for pyarrow < 9")
392+
opt = request.config.option
393+
if opt.markexpr and "not slow" in opt.markexpr:
394+
pytest.skip(
395+
f"{all_numeric_accumulations} not implemented for pyarrow < 9"
396+
)
397+
mark = pytest.mark.xfail(
398+
reason=f"{all_numeric_accumulations} not implemented for pyarrow < 9"
399+
)
400+
request.node.add_marker(mark)
393401

394402
elif all_numeric_accumulations == "cumsum" and (
395403
pa.types.is_boolean(pa_type) or pa.types.is_decimal(pa_type)
@@ -1409,14 +1417,16 @@ def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
14091417
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
14101418
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")
14111419

1412-
# but as of GH#50689, timestamptz is supported
1420+
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
1421+
ArrowDtype.construct_from_string("decimal(7, 2)[pyarrow]")
1422+
1423+
1424+
def test_arrowdtype_construct_from_string_supports_dt64tz():
1425+
# as of GH#50689, timestamptz is supported
14131426
dtype = ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")
14141427
expected = ArrowDtype(pa.timestamp("s", "UTC"))
14151428
assert dtype == expected
14161429

1417-
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
1418-
ArrowDtype.construct_from_string("decimal(7, 2)[pyarrow]")
1419-
14201430

14211431
def test_arrowdtype_construct_from_string_type_only_one_pyarrow():
14221432
# GH#51225

pandas/tests/frame/constructors/test_from_records.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def test_from_records_sequencelike(self):
7272

7373
# this is actually tricky to create the recordlike arrays and
7474
# have the dtypes be intact
75-
blocks = df._to_dict_of_blocks()
75+
blocks = df._to_dict_of_blocks(copy=False)
7676
tuples = []
7777
columns = []
7878
dtypes = []
@@ -153,7 +153,7 @@ def test_from_records_dictlike(self):
153153

154154
# columns is in a different order here than the actual items iterated
155155
# from the dict
156-
blocks = df._to_dict_of_blocks()
156+
blocks = df._to_dict_of_blocks(copy=False)
157157
columns = []
158158
for b in blocks.values():
159159
columns.extend(b.columns)

pandas/tests/indexes/datetimes/test_setops.py

+7
Original file line numberDiff line numberDiff line change
@@ -595,3 +595,10 @@ def test_intersection_dst_transition(self, tz):
595595
result = idx1.intersection(idx2)
596596
expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
597597
tm.assert_index_equal(result, expected)
598+
599+
# GH#45863 same problem for union
600+
index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
601+
index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
602+
result = index1.union(index2)
603+
expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
604+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)