Skip to content

Commit 981ba66

Browse files
authored
Merge branch 'main' into remove-json-locale
2 parents 7682ef6 + 450f051 commit 981ba66

28 files changed

+378
-234
lines changed

.pre-commit-config.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -287,5 +287,4 @@ repos:
287287
language: python
288288
additional_dependencies:
289289
- autotyping==22.9.0
290-
- black==22.6.0
291290
- libcst==0.4.7

asv_bench/benchmarks/package.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def time_import(self):
1111
# measurement of the import time we actually care about,
1212
# without the subprocess or interpreter overhead
1313
cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
14-
p = subprocess.run(cmd, stderr=subprocess.PIPE)
14+
p = subprocess.run(cmd, stderr=subprocess.PIPE, check=True)
1515

1616
line = p.stderr.splitlines()[-1]
1717
field = line.split(b"|")[-2].strip()

doc/source/whatsnew/v1.5.2.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_152:
22

3-
What's new in 1.5.2 (November ??, 2022)
3+
What's new in 1.5.2 (November 21, 2022)
44
---------------------------------------
55

66
These are the changes in pandas 1.5.2. See :ref:`release` for a full changelog
@@ -15,27 +15,27 @@ Fixed regressions
1515
~~~~~~~~~~~~~~~~~
1616
- Fixed regression in :meth:`MultiIndex.join` for extension array dtypes (:issue:`49277`)
1717
- Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`)
18+
- Fixed regression in arithmetic operations for :class:`DataFrame` with :class:`MultiIndex` columns with different dtypes (:issue:`49769`)
1819
- Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance
1920
from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`)
2021
- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`)
2122
- Fixed performance regression in groupby operations (:issue:`49676`)
22-
-
23+
- Fixed regression in :class:`Timedelta` constructor returning object of wrong type when subclassing ``Timedelta`` (:issue:`49579`)
2324

2425
.. ---------------------------------------------------------------------------
2526
.. _whatsnew_152.bug_fixes:
2627

2728
Bug fixes
2829
~~~~~~~~~
2930
- Bug in the Copy-on-Write implementation losing track of views in certain chained indexing cases (:issue:`48996`)
30-
-
31+
- Fixed memory leak in :meth:`.Styler.to_excel` (:issue:`49751`)
3132

3233
.. ---------------------------------------------------------------------------
3334
.. _whatsnew_152.other:
3435

3536
Other
3637
~~~~~
3738
- Reverted ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
38-
-
3939

4040
.. ---------------------------------------------------------------------------
4141
.. _whatsnew_152.contributors:

doc/source/whatsnew/v2.0.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,10 @@ Other enhancements
6161
- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
6262
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
6363
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
64+
- :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`)
6465
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
6566
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
67+
-
6668

6769
.. ---------------------------------------------------------------------------
6870
.. _whatsnew_200.notable_bug_fixes:
@@ -546,6 +548,8 @@ Removal of prior version deprecations/changes
546548
- Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`)
547549
- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
548550
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with integer dtype and floating-point data containing ``NaN``, this now raises ``IntCastingNaNError`` (:issue:`40110`)
551+
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with an integer ``dtype`` and values that are too large to losslessly cast to this dtype, this now raises ``ValueError`` (:issue:`41734`)
552+
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with an integer ``dtype`` and values having either ``datetime64`` or ``timedelta64`` dtypes, this now raises ``TypeError``, use ``values.view("int64")`` instead (:issue:`41770`)
549553
- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
550554
- Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`)
551555
- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)

pandas/_libs/tslibs/dtypes.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
44

55

66
cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
7-
cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
7+
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
88
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
99
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
1010
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1

pandas/_libs/tslibs/dtypes.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def periods_per_second(reso: int) -> int: ...
1010
def is_supported_unit(reso: int) -> bool: ...
1111
def npy_unit_to_abbrev(reso: int) -> str: ...
1212
def get_supported_reso(reso: int) -> int: ...
13+
def abbrev_to_npy_unit(abbrev: str) -> int: ...
1314

1415
class PeriodDtypeBase:
1516
_dtype_code: int # PeriodDtypeCode

pandas/_libs/tslibs/dtypes.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
336336
raise NotImplementedError(unit)
337337

338338

339-
cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev):
339+
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev):
340340
if abbrev == "Y":
341341
return NPY_DATETIMEUNIT.NPY_FR_Y
342342
elif abbrev == "M":

pandas/_libs/tslibs/timedeltas.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cdef class _Timedelta(timedelta):
2222

2323
cpdef timedelta to_pytimedelta(_Timedelta self)
2424
cdef bint _has_ns(self)
25+
cdef bint _is_in_pytimedelta_bounds(self)
2526
cdef _ensure_components(_Timedelta self)
2627
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
2728
cdef _Timedelta _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=*)

pandas/_libs/tslibs/timedeltas.pyx

+39-12
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def ints_to_pytimedelta(ndarray m8values, box=False):
189189
res_val = <object>NaT
190190
else:
191191
if box:
192-
res_val = _timedelta_from_value_and_reso(value, reso=reso)
192+
res_val = _timedelta_from_value_and_reso(Timedelta, value, reso=reso)
193193
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns:
194194
res_val = timedelta(microseconds=int(value) / 1000)
195195
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
@@ -741,7 +741,7 @@ cdef bint _validate_ops_compat(other):
741741
def _op_unary_method(func, name):
742742
def f(self):
743743
new_value = func(self.value)
744-
return _timedelta_from_value_and_reso(new_value, self._creso)
744+
return _timedelta_from_value_and_reso(Timedelta, new_value, self._creso)
745745
f.__name__ = name
746746
return f
747747

@@ -804,7 +804,7 @@ def _binary_op_method_timedeltalike(op, name):
804804
# TODO: more generally could do an overflowcheck in op?
805805
return NaT
806806

807-
return _timedelta_from_value_and_reso(res, reso=self._creso)
807+
return _timedelta_from_value_and_reso(Timedelta, res, reso=self._creso)
808808

809809
f.__name__ = name
810810
return f
@@ -935,10 +935,10 @@ cdef _to_py_int_float(v):
935935

936936

937937
def _timedelta_unpickle(value, reso):
938-
return _timedelta_from_value_and_reso(value, reso)
938+
return _timedelta_from_value_and_reso(Timedelta, value, reso)
939939

940940

941-
cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
941+
cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
942942
# Could make this a classmethod if/when cython supports cdef classmethods
943943
cdef:
944944
_Timedelta td_base
@@ -949,13 +949,13 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
949949
# We pass 0 instead, and override seconds, microseconds, days.
950950
# In principle we could pass 0 for ns and us too.
951951
if reso == NPY_FR_ns:
952-
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value) // 1000)
952+
td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
953953
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
954-
td_base = _Timedelta.__new__(Timedelta, microseconds=int(value))
954+
td_base = _Timedelta.__new__(cls, microseconds=int(value))
955955
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
956-
td_base = _Timedelta.__new__(Timedelta, milliseconds=0)
956+
td_base = _Timedelta.__new__(cls, milliseconds=0)
957957
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
958-
td_base = _Timedelta.__new__(Timedelta, seconds=0)
958+
td_base = _Timedelta.__new__(cls, seconds=0)
959959
# Other resolutions are disabled but could potentially be implemented here:
960960
# elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
961961
# td_base = _Timedelta.__new__(Timedelta, minutes=int(value))
@@ -1093,8 +1093,27 @@ cdef class _Timedelta(timedelta):
10931093
# non-invariant behavior.
10941094
# see GH#44504
10951095
return hash(self.value)
1096-
else:
1096+
elif self._is_in_pytimedelta_bounds() and (
1097+
self._creso == NPY_FR_ns or self._creso == NPY_DATETIMEUNIT.NPY_FR_us
1098+
):
1099+
# If we can defer to timedelta.__hash__, do so, as that
1100+
# ensures the hash is invariant to our _reso.
1101+
# We can only defer for ns and us, as for these two resos we
1102+
# call _Timedelta.__new__ with the correct input in
1103+
# _timedelta_from_value_and_reso; so timedelta.__hash__
1104+
# will be correct
10971105
return timedelta.__hash__(self)
1106+
else:
1107+
# We want to ensure that two equivalent Timedelta objects
1108+
# have the same hash. So we try downcasting to the next-lowest
1109+
# resolution.
1110+
try:
1111+
obj = (<_Timedelta>self)._as_creso(<NPY_DATETIMEUNIT>(self._creso + 1))
1112+
except OverflowError:
1113+
# Doesn't fit, so we're off the hook
1114+
return hash(self.value)
1115+
else:
1116+
return hash(obj)
10981117

10991118
def __richcmp__(_Timedelta self, object other, int op):
11001119
cdef:
@@ -1152,6 +1171,13 @@ cdef class _Timedelta(timedelta):
11521171
else:
11531172
raise NotImplementedError(self._creso)
11541173

1174+
cdef bint _is_in_pytimedelta_bounds(self):
1175+
"""
1176+
Check if we are within the bounds of datetime.timedelta.
1177+
"""
1178+
self._ensure_components()
1179+
return -999999999 <= self._d and self._d <= 999999999
1180+
11551181
cdef _ensure_components(_Timedelta self):
11561182
"""
11571183
compute the components
@@ -1502,7 +1528,7 @@ cdef class _Timedelta(timedelta):
15021528
@classmethod
15031529
def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
15041530
# exposing as classmethod for testing
1505-
return _timedelta_from_value_and_reso(value, reso)
1531+
return _timedelta_from_value_and_reso(cls, value, reso)
15061532

15071533
def as_unit(self, str unit, bint round_ok=True):
15081534
"""
@@ -1737,7 +1763,7 @@ class Timedelta(_Timedelta):
17371763
if value == NPY_NAT:
17381764
return NaT
17391765

1740-
return _timedelta_from_value_and_reso(value, NPY_FR_ns)
1766+
return _timedelta_from_value_and_reso(cls, value, NPY_FR_ns)
17411767

17421768
def __setstate__(self, state):
17431769
if len(state) == 1:
@@ -1829,6 +1855,7 @@ class Timedelta(_Timedelta):
18291855
return NaT
18301856

18311857
return _timedelta_from_value_and_reso(
1858+
Timedelta,
18321859
<int64_t>(other * self.value),
18331860
reso=self._creso,
18341861
)

pandas/core/arrays/_ranges.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def generate_regular_range(
2222
end: Timestamp | Timedelta | None,
2323
periods: int | None,
2424
freq: BaseOffset,
25+
unit: str = "ns",
2526
) -> npt.NDArray[np.intp]:
2627
"""
2728
Generate a range of dates or timestamps with the spans between dates
@@ -37,14 +38,28 @@ def generate_regular_range(
3738
Number of periods in produced date range.
3839
freq : Tick
3940
Describes space between dates in produced date range.
41+
unit : str, default "ns"
42+
The resolution the output is meant to represent.
4043
4144
Returns
4245
-------
43-
ndarray[np.int64] Representing nanoseconds.
46+
ndarray[np.int64]
47+
Representing the given resolution.
4448
"""
4549
istart = start.value if start is not None else None
4650
iend = end.value if end is not None else None
47-
stride = freq.nanos
51+
freq.nanos # raises if non-fixed frequency
52+
td = Timedelta(freq)
53+
try:
54+
td = td.as_unit( # pyright: ignore[reportGeneralTypeIssues]
55+
unit, round_ok=False
56+
)
57+
except ValueError as err:
58+
raise ValueError(
59+
f"freq={freq} is incompatible with unit={unit}. "
60+
"Use a lower freq or a higher unit instead."
61+
) from err
62+
stride = int(td.value)
4863

4964
if periods is None and istart is not None and iend is not None:
5065
b = istart

pandas/core/arrays/datetimes.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
tz_convert_from_utc,
4343
tzconversion,
4444
)
45+
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
4546
from pandas._typing import (
4647
DateTimeErrorChoices,
4748
IntervalClosedType,
@@ -380,6 +381,8 @@ def _generate_range( # type: ignore[override]
380381
ambiguous: TimeAmbiguous = "raise",
381382
nonexistent: TimeNonexistent = "raise",
382383
inclusive: IntervalClosedType = "both",
384+
*,
385+
unit: str | None = None,
383386
) -> DatetimeArray:
384387

385388
periods = dtl.validate_periods(periods)
@@ -402,6 +405,17 @@ def _generate_range( # type: ignore[override]
402405
if start is NaT or end is NaT:
403406
raise ValueError("Neither `start` nor `end` can be NaT")
404407

408+
if unit is not None:
409+
if unit not in ["s", "ms", "us", "ns"]:
410+
raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
411+
else:
412+
unit = "ns"
413+
414+
if start is not None and unit is not None:
415+
start = start.as_unit(unit, round_ok=False)
416+
if end is not None and unit is not None:
417+
end = end.as_unit(unit, round_ok=False)
418+
405419
left_inclusive, right_inclusive = validate_inclusive(inclusive)
406420
start, end = _maybe_normalize_endpoints(start, end, normalize)
407421
tz = _infer_tz_from_endpoints(start, end, tz)
@@ -416,6 +430,7 @@ def _generate_range( # type: ignore[override]
416430
end = _maybe_localize_point(
417431
end, end_tz, end, freq, tz, ambiguous, nonexistent
418432
)
433+
419434
if freq is not None:
420435
# We break Day arithmetic (fixed 24 hour) here and opt for
421436
# Day to mean calendar day (23/24/25 hour). Therefore, strip
@@ -427,7 +442,7 @@ def _generate_range( # type: ignore[override]
427442
end = end.tz_localize(None)
428443

429444
if isinstance(freq, Tick):
430-
i8values = generate_regular_range(start, end, periods, freq)
445+
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
431446
else:
432447
xdr = _generate_range(
433448
start=start, end=end, periods=periods, offset=freq
@@ -441,8 +456,13 @@ def _generate_range( # type: ignore[override]
441456
if not timezones.is_utc(tz):
442457
# short-circuit tz_localize_to_utc which would make
443458
# an unnecessary copy with UTC but be a no-op.
459+
creso = abbrev_to_npy_unit(unit)
444460
i8values = tzconversion.tz_localize_to_utc(
445-
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
461+
i8values,
462+
tz,
463+
ambiguous=ambiguous,
464+
nonexistent=nonexistent,
465+
creso=creso,
446466
)
447467

448468
# i8values is localized datetime64 array -> have to convert
@@ -477,8 +497,8 @@ def _generate_range( # type: ignore[override]
477497
if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
478498
i8values = i8values[:-1]
479499

480-
dt64_values = i8values.view("datetime64[ns]")
481-
dtype = tz_to_dtype(tz)
500+
dt64_values = i8values.view(f"datetime64[{unit}]")
501+
dtype = tz_to_dtype(tz, unit=unit)
482502
return cls._simple_new(dt64_values, freq=freq, dtype=dtype)
483503

484504
# -----------------------------------------------------------------

pandas/core/dtypes/cast.py

+6-15
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
IntCastingNaNError,
4242
LossySetitemError,
4343
)
44-
from pandas.util._exceptions import find_stack_level
4544
from pandas.util._validators import validate_bool_kwarg
4645

4746
from pandas.core.dtypes.common import (
@@ -1680,25 +1679,17 @@ def maybe_cast_to_integer_array(
16801679

16811680
if casted.dtype < arr.dtype:
16821681
# GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
1683-
warnings.warn(
1684-
f"Values are too large to be losslessly cast to {dtype}. "
1685-
"In a future version this will raise OverflowError. To retain the "
1686-
f"old behavior, use pd.Series(values).astype({dtype})",
1687-
FutureWarning,
1688-
stacklevel=find_stack_level(),
1682+
raise ValueError(
1683+
f"Values are too large to be losslessly converted to {dtype}. "
1684+
f"To cast anyway, use pd.Series(values).astype({dtype})"
16891685
)
1690-
return casted
16911686

16921687
if arr.dtype.kind in ["m", "M"]:
16931688
# test_constructor_maskedarray_nonfloat
1694-
warnings.warn(
1695-
f"Constructing Series or DataFrame from {arr.dtype} values and "
1696-
f"dtype={dtype} is deprecated and will raise in a future version. "
1697-
"Use values.view(dtype) instead.",
1698-
FutureWarning,
1699-
stacklevel=find_stack_level(),
1689+
raise TypeError(
1690+
f"Constructing a Series or DataFrame from {arr.dtype} values and "
1691+
f"dtype={dtype} is not supported. Use values.view({dtype}) instead."
17001692
)
1701-
return casted
17021693

17031694
# No known cases that get here, but raising explicitly to cover our bases.
17041695
raise ValueError(f"values cannot be losslessly cast to {dtype}")

0 commit comments

Comments
 (0)