Skip to content

Commit ce0649b

Browse files
Merge remote-tracking branch 'upstream/main' into internals-column-setitem
2 parents a2aa8aa + d5ba8c0 commit ce0649b

33 files changed

+264
-213
lines changed

doc/source/conf.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
#
1010
# All configuration values have a default; values that are commented out
1111
# serve to show the default.
12-
1312
from datetime import datetime
1413
import importlib
1514
import inspect
1615
import logging
1716
import os
1817
import sys
18+
import warnings
1919

2020
import jinja2
2121
from numpydoc.docscrape import NumpyDocString
@@ -640,7 +640,10 @@ def linkcode_resolve(domain, info):
640640
obj = submod
641641
for part in fullname.split("."):
642642
try:
643-
obj = getattr(obj, part)
643+
with warnings.catch_warnings():
644+
# Accessing deprecated objects will generate noisy warnings
645+
warnings.simplefilter("ignore", FutureWarning)
646+
obj = getattr(obj, part)
644647
except AttributeError:
645648
return None
646649

doc/source/reference/testing.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Exceptions and warnings
2626

2727
errors.AbstractMethodError
2828
errors.AccessorRegistrationWarning
29+
errors.DataError
2930
errors.DtypeWarning
3031
errors.DuplicateLabelError
3132
errors.EmptyDataError

doc/source/whatsnew/v1.4.3.rst

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Fixed regressions
1818
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
1919
- Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`)
2020
- Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`)
21+
- Fixed regression in :meth:`.DataFrameGroupBy.agg` when used with list-likes or dict-likes and ``axis=1`` that would give incorrect results; now raises ``NotImplementedError`` (:issue:`46995`)
22+
- Fixed regression in :meth:`DataFrame.resample` and :meth:`DataFrame.rolling` when used with list-likes or dict-likes and ``axis=1`` that would raise an unintuitive error message; now raises ``NotImplementedError`` (:issue:`46904`)
2123

2224
.. ---------------------------------------------------------------------------
2325

doc/source/whatsnew/v1.5.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ Other enhancements
151151
- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
152152
- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`)
153153
- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
154+
- :class:`DataError` now exposed in ``pandas.errors`` (:issue:`27656`)
154155

155156
.. ---------------------------------------------------------------------------
156157
.. _whatsnew_150.notable_bug_fixes:
@@ -786,6 +787,8 @@ Groupby/resample/rolling
786787
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
787788
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
788789
- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
790+
- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
791+
-
789792

790793
Reshaping
791794
^^^^^^^^^

pandas/_libs/tslib.pyx

+23-5
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ from pandas._libs.tslibs.nattype cimport (
5959
c_nat_strings as nat_strings,
6060
)
6161
from pandas._libs.tslibs.timestamps cimport _Timestamp
62+
from pandas._libs.tslibs.timezones cimport tz_compare
6263

6364
from pandas._libs.tslibs import (
6465
Resolution,
@@ -447,6 +448,7 @@ cpdef array_to_datetime(
447448
bint string_to_dts_failed
448449
datetime py_dt
449450
tzinfo tz_out = None
451+
bint found_tz = False, found_naive = False
450452

451453
# specify error conditions
452454
assert is_raise or is_ignore or is_coerce
@@ -465,18 +467,34 @@ cpdef array_to_datetime(
465467
elif PyDateTime_Check(val):
466468
seen_datetime = True
467469
if val.tzinfo is not None:
470+
found_tz = True
468471
if utc_convert:
469472
_ts = convert_datetime_to_tsobject(val, None)
470473
iresult[i] = _ts.value
471-
else:
474+
elif found_naive:
472475
raise ValueError('Tz-aware datetime.datetime '
473476
'cannot be converted to '
474477
'datetime64 unless utc=True')
475-
elif isinstance(val, _Timestamp):
476-
iresult[i] = val.value
478+
elif tz_out is not None and not tz_compare(tz_out, val.tzinfo):
479+
raise ValueError('Tz-aware datetime.datetime '
480+
'cannot be converted to '
481+
'datetime64 unless utc=True')
482+
else:
483+
found_tz = True
484+
tz_out = val.tzinfo
485+
_ts = convert_datetime_to_tsobject(val, None)
486+
iresult[i] = _ts.value
487+
477488
else:
478-
iresult[i] = pydatetime_to_dt64(val, &dts)
479-
check_dts_bounds(&dts)
489+
found_naive = True
490+
if found_tz:
491+
raise ValueError('Cannot mix tz-aware with '
492+
'tz-naive values')
493+
if isinstance(val, _Timestamp):
494+
iresult[i] = val.value
495+
else:
496+
iresult[i] = pydatetime_to_dt64(val, &dts)
497+
check_dts_bounds(&dts)
480498

481499
elif PyDate_Check(val):
482500
seen_datetime = True

pandas/_libs/tslibs/conversion.pyi

-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,4 @@ def ensure_timedelta64ns(
2323
arr: np.ndarray, # np.ndarray[timedelta64[ANY]]
2424
copy: bool = ...,
2525
) -> np.ndarray: ... # np.ndarray[timedelta64ns]
26-
def datetime_to_datetime64(
27-
values: npt.NDArray[np.object_],
28-
) -> tuple[np.ndarray, tzinfo | None]: ... # (np.ndarray[dt64ns], _)
2926
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...

pandas/_libs/tslibs/conversion.pyx

-74
Original file line numberDiff line numberDiff line change
@@ -264,80 +264,6 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
264264
return dt64_result.view(TD64NS_DTYPE)
265265

266266

267-
# ----------------------------------------------------------------------
268-
269-
270-
@cython.boundscheck(False)
271-
@cython.wraparound(False)
272-
def datetime_to_datetime64(ndarray values):
273-
# ndarray[object], but can't declare object without ndim
274-
"""
275-
Convert ndarray of datetime-like objects to int64 array representing
276-
nanosecond timestamps.
277-
278-
Parameters
279-
----------
280-
values : ndarray[object]
281-
282-
Returns
283-
-------
284-
result : ndarray[datetime64ns]
285-
inferred_tz : tzinfo or None
286-
"""
287-
cdef:
288-
Py_ssize_t i, n = values.size
289-
object val
290-
int64_t ival
291-
ndarray iresult # int64_t, but can't declare that without specifying ndim
292-
npy_datetimestruct dts
293-
_TSObject _ts
294-
bint found_naive = False
295-
tzinfo inferred_tz = None
296-
297-
cnp.broadcast mi
298-
299-
result = np.empty((<object>values).shape, dtype='M8[ns]')
300-
iresult = result.view('i8')
301-
302-
mi = cnp.PyArray_MultiIterNew2(iresult, values)
303-
for i in range(n):
304-
# Analogous to: val = values[i]
305-
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
306-
307-
if checknull_with_nat(val):
308-
ival = NPY_NAT
309-
elif PyDateTime_Check(val):
310-
if val.tzinfo is not None:
311-
if found_naive:
312-
raise ValueError('Cannot mix tz-aware with '
313-
'tz-naive values')
314-
if inferred_tz is not None:
315-
if not tz_compare(val.tzinfo, inferred_tz):
316-
raise ValueError('Array must be all same time zone')
317-
else:
318-
inferred_tz = val.tzinfo
319-
320-
_ts = convert_datetime_to_tsobject(val, None)
321-
ival = _ts.value
322-
check_dts_bounds(&_ts.dts)
323-
else:
324-
found_naive = True
325-
if inferred_tz is not None:
326-
raise ValueError('Cannot mix tz-aware with '
327-
'tz-naive values')
328-
ival = pydatetime_to_dt64(val, &dts)
329-
check_dts_bounds(&dts)
330-
else:
331-
raise TypeError(f'Unrecognized value type: {type(val)}')
332-
333-
# Analogous to: iresult[i] = ival
334-
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
335-
336-
cnp.PyArray_MultiIter_NEXT(mi)
337-
338-
return result, inferred_tz
339-
340-
341267
# ----------------------------------------------------------------------
342268
# _TSObject Conversion
343269

pandas/conftest.py

+39
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,24 @@ def pytest_addoption(parser):
105105
)
106106

107107

108+
def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
109+
"""Ignore doctest warning.
110+
111+
Parameters
112+
----------
113+
item : pytest.Item
114+
pytest test item.
115+
path : str
116+
Module path to Python object, e.g. "pandas.core.frame.DataFrame.append". A
117+
warning will be filtered when item.name ends with in given path. So it is
118+
sufficient to specify e.g. "DataFrame.append".
119+
message : str
120+
Message to be filtered.
121+
"""
122+
if item.name.endswith(path):
123+
item.add_marker(pytest.mark.filterwarnings(f"ignore:{message}"))
124+
125+
108126
def pytest_collection_modifyitems(items, config):
109127
skip_slow = config.getoption("--skip-slow")
110128
only_slow = config.getoption("--only-slow")
@@ -117,13 +135,34 @@ def pytest_collection_modifyitems(items, config):
117135
(pytest.mark.db, "db", skip_db, "--skip-db"),
118136
]
119137

138+
# Warnings from doctests that can be ignored; place reason in comment above.
139+
# Each entry specifies (path, message) - see the ignore_doctest_warning function
140+
ignored_doctest_warnings = [
141+
# Deprecations where the docstring will emit a warning
142+
("DataFrame.append", "The frame.append method is deprecated"),
143+
("Series.append", "The series.append method is deprecated"),
144+
("dtypes.common.is_categorical", "is_categorical is deprecated"),
145+
("Categorical.replace", "Categorical.replace is deprecated"),
146+
("dtypes.common.is_extension_type", "'is_extension_type' is deprecated"),
147+
("Index.is_mixed", "Index.is_mixed is deprecated"),
148+
("MultiIndex._is_lexsorted", "MultiIndex.is_lexsorted is deprecated"),
149+
# Docstring divides by zero to show behavior difference
150+
("missing.mask_zero_div_zero", "divide by zero encountered"),
151+
# Docstring demonstrates the call raises a warning
152+
("_validators.validate_axis_style_args", "Use named arguments"),
153+
]
154+
120155
for item in items:
121156
if config.getoption("--doctest-modules") or config.getoption(
122157
"--doctest-cython", default=False
123158
):
124159
# autouse=True for the add_doctest_imports can lead to expensive teardowns
125160
# since doctest_namespace is a session fixture
126161
item.add_marker(pytest.mark.usefixtures("add_doctest_imports"))
162+
163+
for path, message in ignored_doctest_warnings:
164+
ignore_doctest_warning(item, path, message)
165+
127166
# mark all tests in the pandas/tests/frame directory with "arraymanager"
128167
if "/frame/" in item.nodeid:
129168
item.add_marker(pytest.mark.arraymanager)

pandas/core/apply.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
Axis,
3434
NDFrameT,
3535
)
36+
from pandas.errors import DataError
3637
from pandas.util._decorators import cache_readonly
3738
from pandas.util._exceptions import find_stack_level
3839

@@ -51,7 +52,6 @@
5152

5253
from pandas.core.algorithms import safe_sort
5354
from pandas.core.base import (
54-
DataError,
5555
SelectionMixin,
5656
SpecificationError,
5757
)
@@ -325,6 +325,9 @@ def agg_list_like(self) -> DataFrame | Series:
325325
obj = self.obj
326326
arg = cast(List[AggFuncTypeBase], self.f)
327327

328+
if getattr(obj, "axis", 0) == 1:
329+
raise NotImplementedError("axis other than 0 is not supported")
330+
328331
if not isinstance(obj, SelectionMixin):
329332
# i.e. obj is Series or DataFrame
330333
selected_obj = obj
@@ -456,6 +459,9 @@ def agg_dict_like(self) -> DataFrame | Series:
456459
obj = self.obj
457460
arg = cast(AggFuncTypeDict, self.f)
458461

462+
if getattr(obj, "axis", 0) == 1:
463+
raise NotImplementedError("axis other than 0 is not supported")
464+
459465
if not isinstance(obj, SelectionMixin):
460466
# i.e. obj is Series or DataFrame
461467
selected_obj = obj

pandas/core/arrays/datetimes.py

-8
Original file line numberDiff line numberDiff line change
@@ -2263,14 +2263,6 @@ def objects_to_datetime64ns(
22632263
allow_mixed=allow_mixed,
22642264
)
22652265
result = result.reshape(data.shape, order=order)
2266-
except ValueError as err:
2267-
try:
2268-
values, tz_parsed = conversion.datetime_to_datetime64(data)
2269-
# If tzaware, these values represent unix timestamps, so we
2270-
# return them as i8 to distinguish from wall times
2271-
return values.view("i8"), tz_parsed
2272-
except (ValueError, TypeError):
2273-
raise err
22742266
except OverflowError as err:
22752267
# Exception is raised when a part of date is greater than 32 bit signed int
22762268
raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err

pandas/core/arrays/sparse/array.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
12531253
IntIndex
12541254
Indices: array([2, 3], dtype=int32)
12551255
1256-
>>> arr.astype(np.dtype('int32'))
1256+
>>> arr.astype(SparseDtype(np.dtype('int32')))
12571257
[0, 0, 1, 2]
12581258
Fill: 0
12591259
IntIndex
@@ -1262,19 +1262,19 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
12621262
Using a NumPy dtype with a different kind (e.g. float) will coerce
12631263
just ``self.sp_values``.
12641264
1265-
>>> arr.astype(np.dtype('float64'))
1265+
>>> arr.astype(SparseDtype(np.dtype('float64')))
12661266
... # doctest: +NORMALIZE_WHITESPACE
1267-
[0.0, 0.0, 1.0, 2.0]
1268-
Fill: 0.0
1267+
[nan, nan, 1.0, 2.0]
1268+
Fill: nan
12691269
IntIndex
12701270
Indices: array([2, 3], dtype=int32)
12711271
1272-
Use a SparseDtype if you wish to be change the fill value as well.
1272+
Using a SparseDtype, you can also change the fill value as well.
12731273
1274-
>>> arr.astype(SparseDtype("float64", fill_value=np.nan))
1274+
>>> arr.astype(SparseDtype("float64", fill_value=0.0))
12751275
... # doctest: +NORMALIZE_WHITESPACE
1276-
[nan, nan, 1.0, 2.0]
1277-
Fill: nan
1276+
[0.0, 0.0, 1.0, 2.0]
1277+
Fill: 0.0
12781278
IntIndex
12791279
Indices: array([2, 3], dtype=int32)
12801280
"""

pandas/core/base.py

-4
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,6 @@ def __setattr__(self, key: str, value):
176176
object.__setattr__(self, key, value)
177177

178178

179-
class DataError(Exception):
180-
pass
181-
182-
183179
class SpecificationError(Exception):
184180
pass
185181

0 commit comments

Comments
 (0)