Skip to content

Commit 3edbcb0

Browse files
committed
Merge remote-tracking branch 'upstream/main' into tst/ref/test_sql
2 parents 9db6c20 + e7d2c7a commit 3edbcb0

File tree

20 files changed

+59
-146
lines changed

20 files changed

+59
-146
lines changed

doc/source/whatsnew/v2.2.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ Categorical
301301

302302
Datetimelike
303303
^^^^^^^^^^^^
304+
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
304305
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
305306
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
306307
- Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`)
@@ -345,9 +346,9 @@ Interval
345346

346347
Indexing
347348
^^^^^^^^
349+
- Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`)
348350
- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
349351
- Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`)
350-
-
351352

352353
Missing
353354
^^^^^^^

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -993,7 +993,7 @@ cdef class TextReader:
993993
missing_usecols = [col for col in self.usecols if col >= num_cols]
994994
if missing_usecols:
995995
raise ParserError(
996-
"Defining usecols without of bounds indices is not allowed. "
996+
"Defining usecols with out-of-bounds indices is not allowed. "
997997
f"{missing_usecols} are out of bounds.",
998998
)
999999

pandas/_libs/tslibs/parsing.pyi

-5
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,6 @@ def try_parse_dates(
2323
values: npt.NDArray[np.object_], # object[:]
2424
parser,
2525
) -> npt.NDArray[np.object_]: ...
26-
def try_parse_year_month_day(
27-
years: npt.NDArray[np.object_], # object[:]
28-
months: npt.NDArray[np.object_], # object[:]
29-
days: npt.NDArray[np.object_], # object[:]
30-
) -> npt.NDArray[np.object_]: ...
3126
def guess_datetime_format(
3227
dt_str,
3328
dayfirst: bool | None = ...,

pandas/_libs/tslibs/parsing.pyx

-19
Original file line numberDiff line numberDiff line change
@@ -766,25 +766,6 @@ def try_parse_dates(object[:] values, parser) -> np.ndarray:
766766
return result.base # .base to access underlying ndarray
767767

768768

769-
def try_parse_year_month_day(
770-
object[:] years, object[:] months, object[:] days
771-
) -> np.ndarray:
772-
cdef:
773-
Py_ssize_t i, n
774-
object[::1] result
775-
776-
n = len(years)
777-
# TODO(cython3): Use len instead of `shape[0]`
778-
if months.shape[0] != n or days.shape[0] != n:
779-
raise ValueError("Length of years/months/days must all be equal")
780-
result = np.empty(n, dtype="O")
781-
782-
for i in range(n):
783-
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
784-
785-
return result.base # .base to access underlying ndarray
786-
787-
788769
# ----------------------------------------------------------------------
789770
# Miscellaneous
790771

pandas/core/arrays/datetimes.py

+1
Original file line numberDiff line numberDiff line change
@@ -2308,6 +2308,7 @@ def _sequence_to_dt64ns(
23082308
# assume this data are epoch timestamps
23092309
if data.dtype != INT64_DTYPE:
23102310
data = data.astype(np.int64, copy=False)
2311+
copy = False
23112312
result = data.view(out_dtype)
23122313

23132314
if copy:

pandas/core/dtypes/cast.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1816,7 +1816,8 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
18161816
if not isinstance(tipo, np.dtype):
18171817
# i.e. nullable IntegerDtype; we can put this into an ndarray
18181818
# losslessly iff it has no NAs
1819-
if element._hasna:
1819+
arr = element._values if isinstance(element, ABCSeries) else element
1820+
if arr._hasna:
18201821
raise LossySetitemError
18211822
return element
18221823

pandas/core/internals/managers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
BlockPlacement,
2727
BlockValuesRefs,
2828
)
29+
from pandas._libs.tslibs import Timestamp
2930
from pandas.errors import PerformanceWarning
3031
from pandas.util._decorators import cache_readonly
3132
from pandas.util._exceptions import find_stack_level
@@ -2304,7 +2305,8 @@ def _preprocess_slice_or_indexer(
23042305
def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike:
23052306
if isinstance(dtype, DatetimeTZDtype):
23062307
# NB: exclude e.g. pyarrow[dt64tz] dtypes
2307-
i8values = np.full(shape, fill_value._value)
2308+
ts = Timestamp(fill_value).as_unit(dtype.unit)
2309+
i8values = np.full(shape, ts._value)
23082310
return DatetimeArray(i8values, dtype=dtype)
23092311

23102312
elif is_1d_only_ea_dtype(dtype):

pandas/core/tools/datetimes.py

-57
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828
get_unit_from_dtype,
2929
iNaT,
3030
is_supported_unit,
31-
nat_strings,
32-
parsing,
3331
timezones as libtimezones,
3432
)
3533
from pandas._libs.tslibs.conversion import precision_from_unit
@@ -42,7 +40,6 @@
4240
AnyArrayLike,
4341
ArrayLike,
4442
DateTimeErrorChoices,
45-
npt,
4643
)
4744
from pandas.util._exceptions import find_stack_level
4845

@@ -62,14 +59,12 @@
6259
ABCDataFrame,
6360
ABCSeries,
6461
)
65-
from pandas.core.dtypes.missing import notna
6662

6763
from pandas.arrays import (
6864
DatetimeArray,
6965
IntegerArray,
7066
NumpyExtensionArray,
7167
)
72-
from pandas.core import algorithms
7368
from pandas.core.algorithms import unique
7469
from pandas.core.arrays import ArrowExtensionArray
7570
from pandas.core.arrays.base import ExtensionArray
@@ -1273,58 +1268,6 @@ def coerce(values):
12731268
return values
12741269

12751270

1276-
def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None:
1277-
"""
1278-
try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
1279-
arg is a passed in as an object dtype, but could really be ints/strings
1280-
with nan-like/or floats (e.g. with nan)
1281-
1282-
Parameters
1283-
----------
1284-
arg : np.ndarray[object]
1285-
errors : {'raise','ignore','coerce'}
1286-
"""
1287-
1288-
def calc(carg):
1289-
# calculate the actual result
1290-
carg = carg.astype(object, copy=False)
1291-
parsed = parsing.try_parse_year_month_day(
1292-
carg / 10000, carg / 100 % 100, carg % 100
1293-
)
1294-
return tslib.array_to_datetime(parsed, errors=errors)[0]
1295-
1296-
def calc_with_mask(carg, mask):
1297-
result = np.empty(carg.shape, dtype="M8[ns]")
1298-
iresult = result.view("i8")
1299-
iresult[~mask] = iNaT
1300-
1301-
masked_result = calc(carg[mask].astype(np.float64).astype(np.int64))
1302-
result[mask] = masked_result.astype("M8[ns]")
1303-
return result
1304-
1305-
# try intlike / strings that are ints
1306-
try:
1307-
return calc(arg.astype(np.int64))
1308-
except (ValueError, OverflowError, TypeError):
1309-
pass
1310-
1311-
# a float with actual np.nan
1312-
try:
1313-
carg = arg.astype(np.float64)
1314-
return calc_with_mask(carg, notna(carg))
1315-
except (ValueError, OverflowError, TypeError):
1316-
pass
1317-
1318-
# string with NaN-like
1319-
try:
1320-
mask = ~algorithms.isin(arg, list(nat_strings))
1321-
return calc_with_mask(arg, mask)
1322-
except (ValueError, OverflowError, TypeError):
1323-
pass
1324-
1325-
return None
1326-
1327-
13281271
__all__ = [
13291272
"DateParseError",
13301273
"should_cache",

pandas/io/parsers/python_parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,8 @@ def _handle_usecols(
615615
]
616616
if missing_usecols:
617617
raise ParserError(
618-
"Defining usecols without of bounds indices is not allowed. "
619-
f"{missing_usecols} are out of bounds.",
618+
"Defining usecols with out-of-bounds indices is not allowed. "
619+
f"{missing_usecols} are out-of-bounds.",
620620
)
621621
col_indices = self.usecols
622622

pandas/tests/arrays/datetimes/test_constructors.py

+2-16
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import pandas as pd
99
import pandas._testing as tm
1010
from pandas.core.arrays import DatetimeArray
11-
from pandas.core.arrays.datetimes import _sequence_to_dt64ns
1211

1312

1413
class TestDatetimeArrayConstructor:
@@ -44,7 +43,6 @@ def test_freq_validation(self):
4443
"meth",
4544
[
4645
DatetimeArray._from_sequence,
47-
_sequence_to_dt64ns,
4846
pd.to_datetime,
4947
pd.DatetimeIndex,
5048
],
@@ -104,9 +102,6 @@ def test_bool_dtype_raises(self):
104102
with pytest.raises(TypeError, match=msg):
105103
DatetimeArray._from_sequence(arr)
106104

107-
with pytest.raises(TypeError, match=msg):
108-
_sequence_to_dt64ns(arr)
109-
110105
with pytest.raises(TypeError, match=msg):
111106
pd.DatetimeIndex(arr)
112107

@@ -143,14 +138,12 @@ def test_tz_dtype_mismatch_raises(self):
143138
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
144139
)
145140
with pytest.raises(TypeError, match="data is already tz-aware"):
146-
DatetimeArray._from_sequence_not_strict(
147-
arr, dtype=DatetimeTZDtype(tz="UTC")
148-
)
141+
DatetimeArray._from_sequence(arr, dtype=DatetimeTZDtype(tz="UTC"))
149142

150143
def test_tz_dtype_matches(self):
151144
dtype = DatetimeTZDtype(tz="US/Central")
152145
arr = DatetimeArray._from_sequence(["2000"], dtype=dtype)
153-
result = DatetimeArray._from_sequence_not_strict(arr, dtype=dtype)
146+
result = DatetimeArray._from_sequence(arr, dtype=dtype)
154147
tm.assert_equal(arr, result)
155148

156149
@pytest.mark.parametrize("order", ["F", "C"])
@@ -160,13 +153,6 @@ def test_2d(self, order):
160153
if order == "F":
161154
arr = arr.T
162155

163-
res = _sequence_to_dt64ns(arr)
164-
expected = _sequence_to_dt64ns(arr.ravel())
165-
166-
tm.assert_numpy_array_equal(res[0].ravel(), expected[0])
167-
assert res[1] == expected[1]
168-
assert res[2] == expected[2]
169-
170156
res = DatetimeArray._from_sequence(arr)
171157
expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape)
172158
tm.assert_datetime_array_equal(res, expected)

pandas/tests/arrays/datetimes/test_cumulative.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,35 @@
77
class TestAccumulator:
88
def test_accumulators_freq(self):
99
# GH#50297
10-
arr = DatetimeArray._from_sequence_not_strict(
10+
arr = DatetimeArray._from_sequence(
1111
[
1212
"2000-01-01",
1313
"2000-01-02",
1414
"2000-01-03",
15-
],
16-
freq="D",
17-
)
15+
]
16+
)._with_freq("infer")
1817
result = arr._accumulate("cummin")
19-
expected = DatetimeArray._from_sequence_not_strict(
20-
["2000-01-01"] * 3, freq=None
21-
)
18+
expected = DatetimeArray._from_sequence(["2000-01-01"] * 3)
2219
tm.assert_datetime_array_equal(result, expected)
2320

2421
result = arr._accumulate("cummax")
25-
expected = DatetimeArray._from_sequence_not_strict(
22+
expected = DatetimeArray._from_sequence(
2623
[
2724
"2000-01-01",
2825
"2000-01-02",
2926
"2000-01-03",
3027
],
31-
freq=None,
3228
)
3329
tm.assert_datetime_array_equal(result, expected)
3430

3531
@pytest.mark.parametrize("func", ["cumsum", "cumprod"])
3632
def test_accumulators_disallowed(self, func):
3733
# GH#50297
38-
arr = DatetimeArray._from_sequence_not_strict(
34+
arr = DatetimeArray._from_sequence(
3935
[
4036
"2000-01-01",
4137
"2000-01-02",
4238
],
43-
freq="D",
44-
)
39+
)._with_freq("infer")
4540
with pytest.raises(TypeError, match=f"Accumulation {func}"):
4641
arr._accumulate(func)

pandas/tests/arrays/test_datetimelike.py

-7
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
PeriodArray,
2828
TimedeltaArray,
2929
)
30-
from pandas.core.arrays.datetimes import _sequence_to_dt64ns
31-
from pandas.core.arrays.timedeltas import sequence_to_td64ns
3230

3331

3432
# TODO: more freq variants
@@ -1314,11 +1312,6 @@ def test_from_pandas_array(dtype):
13141312
expected = cls._from_sequence(data)
13151313
tm.assert_extension_array_equal(result, expected)
13161314

1317-
func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype]
1318-
result = func(arr)[0]
1319-
expected = func(data)[0]
1320-
tm.assert_equal(result, expected)
1321-
13221315
func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
13231316
result = func(arr).array
13241317
expected = func(data).array

pandas/tests/arrays/timedeltas/test_cumulative.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
class TestAccumulator:
88
def test_accumulators_disallowed(self):
99
# GH#50297
10-
arr = TimedeltaArray._from_sequence_not_strict(["1D", "2D"])
10+
arr = TimedeltaArray._from_sequence(["1D", "2D"])
1111
with pytest.raises(TypeError, match="cumprod not supported"):
1212
arr._accumulate("cumprod")
1313

1414
def test_cumsum(self):
1515
# GH#50297
16-
arr = TimedeltaArray._from_sequence_not_strict(["1D", "2D"])
16+
arr = TimedeltaArray._from_sequence(["1D", "2D"])
1717
result = arr._accumulate("cumsum")
18-
expected = TimedeltaArray._from_sequence_not_strict(["1D", "3D"])
18+
expected = TimedeltaArray._from_sequence(["1D", "3D"])
1919
tm.assert_timedelta_array_equal(result, expected)

pandas/tests/frame/indexing/test_setitem.py

+11
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,17 @@ def test_setitem_frame_midx_columns(self):
761761
df[col_name] = df[[col_name]]
762762
tm.assert_frame_equal(df, expected)
763763

764+
def test_loc_setitem_ea_dtype(self):
765+
# GH#55604
766+
df = DataFrame({"a": np.array([10], dtype="i8")})
767+
df.loc[:, "a"] = Series([11], dtype="Int64")
768+
expected = DataFrame({"a": np.array([11], dtype="i8")})
769+
tm.assert_frame_equal(df, expected)
770+
771+
df = DataFrame({"a": np.array([10], dtype="i8")})
772+
df.iloc[:, 0] = Series([11], dtype="Int64")
773+
tm.assert_frame_equal(df, expected)
774+
764775

765776
class TestSetitemTZAwareValues:
766777
@pytest.fixture

pandas/tests/indexes/datetimes/test_constructors.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -74,20 +74,17 @@ def test_explicit_tz_none(self):
7474
with pytest.raises(ValueError, match=msg):
7575
DatetimeIndex([], dtype="M8[ns, UTC]", tz=None)
7676

77-
@pytest.mark.parametrize(
78-
"dt_cls", [DatetimeIndex, DatetimeArray._from_sequence_not_strict]
79-
)
80-
def test_freq_validation_with_nat(self, dt_cls):
77+
def test_freq_validation_with_nat(self):
8178
# GH#11587 make sure we get a useful error message when generate_range
8279
# raises
8380
msg = (
8481
"Inferred frequency None from passed values does not conform "
8582
"to passed frequency D"
8683
)
8784
with pytest.raises(ValueError, match=msg):
88-
dt_cls([pd.NaT, Timestamp("2011-01-01")], freq="D")
85+
DatetimeIndex([pd.NaT, Timestamp("2011-01-01")], freq="D")
8986
with pytest.raises(ValueError, match=msg):
90-
dt_cls([pd.NaT, Timestamp("2011-01-01")._value], freq="D")
87+
DatetimeIndex([pd.NaT, Timestamp("2011-01-01")._value], freq="D")
9188

9289
# TODO: better place for tests shared by DTI/TDI?
9390
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)