Skip to content

Commit 8e435ab

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_numeric_only_gb
� Conflicts: � doc/source/whatsnew/v1.5.0.rst
2 parents 6921c94 + b6f21f3 commit 8e435ab

40 files changed

+758
-303
lines changed

doc/source/whatsnew/v1.5.0.rst

+7-2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ Other enhancements
9595
- :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
9696
- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
9797
- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.GroupBy.quantile` (:issue:`46560`)
98+
- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
9899

99100
.. ---------------------------------------------------------------------------
100101
.. _whatsnew_150.notable_bug_fixes:
@@ -481,7 +482,7 @@ Timedelta
481482

482483
Time Zones
483484
^^^^^^^^^^
484-
-
485+
- Bug in :class:`Timestamp` constructor raising when passed a ``ZoneInfo`` tzinfo object (:issue:`46425`)
485486
-
486487

487488
Numeric
@@ -569,6 +570,8 @@ I/O
569570
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
570571
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
571572
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
573+
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
574+
-
572575

573576
Period
574577
^^^^^^
@@ -599,7 +602,9 @@ Groupby/resample/rolling
599602
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
600603
- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
601604
- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
602-
-
605+
- Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)
606+
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
607+
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
603608

604609
Reshaping
605610
^^^^^^^^^

pandas/_libs/algos_common_helper.pxi.in

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ def ensure_{{name}}(object arr, copy=True):
6565
if (<ndarray>arr).descr.type_num == NPY_{{c_type}}:
6666
return arr
6767
else:
68-
return arr.astype(np.{{dtype}}, copy=copy)
68+
# equiv: arr.astype(np.{{dtype}}, copy=copy)
69+
return cnp.PyArray_Cast(<ndarray>arr, cnp.NPY_{{c_type}})
6970
else:
7071
return np.array(arr, dtype=np.{{dtype}})
7172

pandas/_libs/tslib.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ cpdef array_to_datetime(
424424
"""
425425
cdef:
426426
Py_ssize_t i, n = len(values)
427-
object val, py_dt, tz, tz_out = None
427+
object val, tz
428428
ndarray[int64_t] iresult
429429
ndarray[object] oresult
430430
npy_datetimestruct dts
@@ -443,6 +443,8 @@ cpdef array_to_datetime(
443443
float offset_seconds, tz_offset
444444
set out_tzoffset_vals = set()
445445
bint string_to_dts_failed
446+
datetime py_dt
447+
tzinfo tz_out = None
446448

447449
# specify error conditions
448450
assert is_raise or is_ignore or is_coerce
@@ -647,6 +649,8 @@ cpdef array_to_datetime(
647649
return result, tz_out
648650

649651

652+
@cython.wraparound(False)
653+
@cython.boundscheck(False)
650654
cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values):
651655
"""
652656
Fallback for array_to_datetime if an OutOfBoundsDatetime is raised

pandas/_libs/tslibs/conversion.pyx

+33-29
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ from pandas._libs.tslibs.timezones cimport (
5656
is_fixed_offset,
5757
is_tzlocal,
5858
is_utc,
59+
is_zoneinfo,
5960
maybe_get_tz,
6061
tz_compare,
6162
utc_pytz as UTC,
@@ -532,7 +533,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
532533
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
533534
if is_utc(tz):
534535
pass
535-
elif is_tzlocal(tz):
536+
elif is_tzlocal(tz) or is_zoneinfo(tz):
536537
localize_tzinfo_api(obj.value, tz, &obj.fold)
537538
else:
538539
trans, deltas, typ = get_dst_info(tz)
@@ -581,55 +582,62 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
581582
"""
582583
cdef:
583584
npy_datetimestruct dts
584-
int out_local = 0, out_tzoffset = 0
585-
bint do_parse_datetime_string = False
585+
int out_local = 0, out_tzoffset = 0, string_to_dts_failed
586+
datetime dt
587+
int64_t ival
586588

587589
if len(ts) == 0 or ts in nat_strings:
588590
ts = NaT
591+
obj = _TSObject()
592+
obj.value = NPY_NAT
593+
obj.tzinfo = tz
594+
return obj
589595
elif ts == 'now':
590596
# Issue 9000, we short-circuit rather than going
591597
# into np_datetime_strings which returns utc
592-
ts = datetime.now(tz)
598+
dt = datetime.now(tz)
593599
elif ts == 'today':
594600
# Issue 9000, we short-circuit rather than going
595601
# into np_datetime_strings which returns a normalized datetime
596-
ts = datetime.now(tz)
602+
dt = datetime.now(tz)
597603
# equiv: datetime.today().replace(tzinfo=tz)
598604
else:
599605
string_to_dts_failed = _string_to_dts(
600606
ts, &dts, &out_local,
601607
&out_tzoffset, False
602608
)
603-
try:
604-
if not string_to_dts_failed:
609+
if not string_to_dts_failed:
610+
try:
605611
check_dts_bounds(&dts)
606612
if out_local == 1:
607613
return _create_tsobject_tz_using_offset(dts,
608614
out_tzoffset, tz)
609615
else:
610-
ts = dtstruct_to_dt64(&dts)
616+
ival = dtstruct_to_dt64(&dts)
611617
if tz is not None:
612618
# shift for _localize_tso
613-
ts = tz_localize_to_utc_single(ts, tz,
614-
ambiguous="raise")
619+
ival = tz_localize_to_utc_single(ival, tz,
620+
ambiguous="raise")
615621

616-
except OutOfBoundsDatetime:
617-
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
618-
# parser will return incorrect result because it will ignore
619-
# nanoseconds
620-
raise
622+
return convert_to_tsobject(ival, tz, None, False, False)
621623

622-
except ValueError:
623-
do_parse_datetime_string = True
624+
except OutOfBoundsDatetime:
625+
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
626+
# parser will return incorrect result because it will ignore
627+
# nanoseconds
628+
raise
624629

625-
if string_to_dts_failed or do_parse_datetime_string:
626-
try:
627-
ts = parse_datetime_string(ts, dayfirst=dayfirst,
628-
yearfirst=yearfirst)
629-
except (ValueError, OverflowError):
630-
raise ValueError("could not convert string to Timestamp")
630+
except ValueError:
631+
# Fall through to parse_datetime_string
632+
pass
633+
634+
try:
635+
dt = parse_datetime_string(ts, dayfirst=dayfirst,
636+
yearfirst=yearfirst)
637+
except (ValueError, OverflowError):
638+
raise ValueError("could not convert string to Timestamp")
631639

632-
return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
640+
return convert_datetime_to_tsobject(dt, tz)
633641

634642

635643
cdef inline check_overflows(_TSObject obj):
@@ -688,12 +696,8 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
688696
Sets obj.tzinfo inplace, alters obj.dts inplace.
689697
"""
690698
cdef:
691-
ndarray[int64_t] trans
692-
int64_t[::1] deltas
693699
int64_t local_val
694-
int64_t* tdata
695-
Py_ssize_t pos, ntrans, outpos = -1
696-
str typ
700+
Py_ssize_t outpos = -1
697701

698702
assert obj.tzinfo is None
699703

pandas/_libs/tslibs/timedeltas.pxd

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from cpython.datetime cimport timedelta
22
from numpy cimport int64_t
33

4+
from .np_datetime cimport NPY_DATETIMEUNIT
5+
46

57
# Exposed for tslib, not intended for outside use.
68
cpdef int64_t delta_to_nanoseconds(delta) except? -1
@@ -13,7 +15,9 @@ cdef class _Timedelta(timedelta):
1315
int64_t value # nanoseconds
1416
bint _is_populated # are my components populated
1517
int64_t _d, _h, _m, _s, _ms, _us, _ns
18+
NPY_DATETIMEUNIT _reso
1619

1720
cpdef timedelta to_pytimedelta(_Timedelta self)
1821
cdef bint _has_ns(self)
1922
cdef _ensure_components(_Timedelta self)
23+
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)

0 commit comments

Comments
 (0)