Skip to content

Commit 6b38fbf

Browse files
jbrockmendelTLouf
authored andcommitted
PERF: DataFrame.transpose with dt64tz (pandas-dev#40149)
1 parent a73b25a commit 6b38fbf

File tree

4 files changed

+65
-7
lines changed

4 files changed

+65
-7
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ Performance improvements
673673
- Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
674674
- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
675675
- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`)
676+
- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`)
676677
- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`)
677678

678679
.. ---------------------------------------------------------------------------

pandas/core/array_algos/take.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
from pandas._typing import ArrayLike
1717

1818
from pandas.core.dtypes.cast import maybe_promote
19-
from pandas.core.dtypes.common import ensure_platform_int
19+
from pandas.core.dtypes.common import (
20+
ensure_platform_int,
21+
is_1d_only_ea_obj,
22+
)
2023
from pandas.core.dtypes.missing import na_value_for_dtype
2124

2225
from pandas.core.construction import ensure_wrapped_if_datetimelike
@@ -91,12 +94,14 @@ def take_nd(
9194

9295
if not isinstance(arr, np.ndarray):
9396
# i.e. ExtensionArray,
94-
if arr.ndim == 2:
95-
# e.g. DatetimeArray, TimedeltArray
97+
# includes for EA to catch DatetimeArray, TimedeltaArray
98+
if not is_1d_only_ea_obj(arr):
99+
# i.e. DatetimeArray, TimedeltaArray
96100
arr = cast("NDArrayBackedExtensionArray", arr)
97101
return arr.take(
98102
indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
99103
)
104+
100105
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
101106

102107
arr = np.asarray(arr)

pandas/core/frame.py

+43-4
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
ensure_platform_int,
102102
infer_dtype_from_object,
103103
is_1d_only_ea_dtype,
104+
is_1d_only_ea_obj,
104105
is_bool_dtype,
105106
is_dataclass,
106107
is_datetime64_any_dtype,
@@ -139,7 +140,11 @@
139140
)
140141
from pandas.core.array_algos.take import take_2d_multi
141142
from pandas.core.arraylike import OpsMixin
142-
from pandas.core.arrays import ExtensionArray
143+
from pandas.core.arrays import (
144+
DatetimeArray,
145+
ExtensionArray,
146+
TimedeltaArray,
147+
)
143148
from pandas.core.arrays.sparse import SparseFrameAccessor
144149
from pandas.core.construction import (
145150
extract_array,
@@ -852,6 +857,28 @@ def _can_fast_transpose(self) -> bool:
852857
# TODO(EA2D) special case would be unnecessary with 2D EAs
853858
return not is_1d_only_ea_dtype(dtype)
854859

860+
@property
861+
def _values_compat(self) -> np.ndarray | DatetimeArray | TimedeltaArray:
862+
"""
863+
Analogue to ._values that may return a 2D ExtensionArray.
864+
"""
865+
mgr = self._mgr
866+
if isinstance(mgr, ArrayManager):
867+
return self._values
868+
869+
blocks = mgr.blocks
870+
if len(blocks) != 1:
871+
return self._values
872+
873+
arr = blocks[0].values
874+
if arr.ndim == 1:
875+
# non-2D ExtensionArray
876+
return self._values
877+
878+
# more generally, whatever we allow in NDArrayBackedExtensionBlock
879+
arr = cast("DatetimeArray | TimedeltaArray", arr)
880+
return arr.T
881+
855882
# ----------------------------------------------------------------------
856883
# Rendering Methods
857884

@@ -3292,7 +3319,18 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
32923319
# construct the args
32933320

32943321
dtypes = list(self.dtypes)
3295-
if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]):
3322+
3323+
if self._can_fast_transpose:
3324+
# Note: tests pass without this, but this improves perf quite a bit.
3325+
new_vals = self._values_compat.T
3326+
if copy:
3327+
new_vals = new_vals.copy()
3328+
3329+
result = self._constructor(new_vals, index=self.columns, columns=self.index)
3330+
3331+
elif (
3332+
self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0])
3333+
):
32963334
# We have EAs with the same dtype. We can preserve that dtype in transpose.
32973335
dtype = dtypes[0]
32983336
arr_type = dtype.construct_array_type()
@@ -9760,8 +9798,9 @@ def func(values: np.ndarray):
97609798

97619799
def blk_func(values, axis=1):
97629800
if isinstance(values, ExtensionArray):
9763-
if values.ndim == 2:
9764-
# i.e. DatetimeArray, TimedeltaArray
9801+
if not is_1d_only_ea_obj(values) and not isinstance(
9802+
self._mgr, ArrayManager
9803+
):
97659804
return values._reduce(name, axis=1, skipna=skipna, **kwds)
97669805
return values._reduce(name, skipna=skipna, **kwds)
97679806
else:

pandas/tests/frame/methods/test_transpose.py

+13
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,16 @@ def test_transpose_get_view(self, float_frame):
9090
dft.values[:, 5:10] = 5
9191

9292
assert (float_frame.values[5:10] == 5).all()
93+
94+
@td.skip_array_manager_invalid_test
95+
def test_transpose_get_view_dt64tzget_view(self):
96+
dti = date_range("2016-01-01", periods=6, tz="US/Pacific")
97+
arr = dti._data.reshape(3, 2)
98+
df = DataFrame(arr)
99+
assert df._mgr.nblocks == 1
100+
101+
result = df.T
102+
assert result._mgr.nblocks == 1
103+
104+
rtrip = result._mgr.blocks[0].values
105+
assert np.shares_memory(arr._data, rtrip._data)

0 commit comments

Comments
 (0)