Skip to content

BUG: Fix+test dataframe tranpose with datetimeTZ #23730

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
51 changes: 47 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
is_integer, is_bool,
is_bool_dtype,
is_numeric_dtype,
is_datetime64_dtype,
is_datetime64_any_dtype,
is_timedelta64_dtype,
is_datetime64tz_dtype,
Expand All @@ -34,7 +35,8 @@
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.inference import is_hashable
from pandas.core.dtypes.missing import isna, notna
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
from pandas.core.dtypes.generic import (
ABCSeries, ABCPanel, ABCDataFrame, ABCDatetimeIndex)

from pandas.core.base import PandasObject, SelectionMixin
from pandas.core.index import (Index, MultiIndex, ensure_index,
Expand Down Expand Up @@ -683,11 +685,52 @@ def transpose(self, *args, **kwargs):

new_axes = self._construct_axes_dict_from(self, [self._get_axis(x)
for x in axes_names])
new_values = self.values.transpose(axes_numbers)
if kwargs.pop('copy', None) or (len(args) and args[-1]):
new_values = new_values.copy()

copy = kwargs.pop('copy', None) or (len(args) and args[-1])
nv.validate_transpose_for_generic(self, kwargs)

values = self.values
if (isinstance(values, ABCDatetimeIndex) and
values.tz is not None and self.ndim > 1):
# transpose is a no-op, and passing axes would raise ValueError
# as the DatetimeIndex.transpose method does not accept that kwarg
tz = values.tz
utc_values = values.asi8.reshape(self.shape)
utc_values = utc_values.transpose(axes_numbers)
if copy:
utc_values = utc_values.copy()
result = self._constructor(utc_values, **new_axes)
if self.ndim > 2:
# We're assuming DataFrame from here on
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why you should do this here at all. Move this to internals and just create new blocks, rather than hacking it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point is that it is going to be a hack regardless unless we allow 2D EAs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not going to happen any time soon if ever

as i said this is way too hacky and needs to be pushed to internals

raise NotImplementedError

for col in result.columns:
result[col] = pd.DatetimeIndex(result[col], tz=tz)
return result.__finalize__(self)

else:
new_values = values.transpose(axes_numbers)
if copy:
new_values = new_values.copy()

if is_datetime64_dtype(new_values):
# case where we have multiple columns with identical
# datetime64tz dtypes; the dtype will be lost in the call
# to `self.values`, so we need to restore it.
dtypes = self.dtypes
if (any(is_datetime64tz_dtype(d) for d in dtypes) and
all(d == dtypes[0] for d in dtypes)):
# these values represent UTC timestamps
new_values = new_values.view('i8')
result = self._constructor(new_values, **new_axes)
if self.ndim != 2:
# assuming DataFrame from here on out
raise NotImplementedError
tz = self.dtypes[0].tz
for col in result.columns:
result[col] = pd.DatetimeIndex(result[col], tz=tz)
return result.__finalize__(self)

return self._constructor(new_values, **new_axes).__finalize__(self)

def swapaxes(self, axis1, axis2, copy=True):
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from pandas.core.dtypes.common import (
_NS_DTYPE,
is_datetime64tz_dtype,
is_datetimelike_v_numeric,
is_numeric_v_string_like, is_extension_type,
is_extension_array_dtype,
Expand Down Expand Up @@ -781,6 +782,9 @@ def _interleave(self):
dtype = dtype.subtype
elif is_extension_array_dtype(dtype):
dtype = 'object'
elif is_datetime64tz_dtype(dtype):
# TODO: we shouldn't be temporarily-dropping dtype information
dtype = 'M8[ns]'

result = np.empty(self.shape, dtype=dtype)

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,19 @@ def box_df_fail(request):
return request.param


@pytest.fixture(params=[(pd.Index, False),
(pd.Series, False),
(pd.DataFrame, False),
(pd.DataFrame, True)],
ids=lambda x: x[0].__name__ + '-' + str(x[1]))
def box_with_transpose(request):
"""
Fixture similar to `box` but testing both transpose cases for DataFrame
"""
# GH#23620
return request.param


@pytest.fixture(params=[(pd.Index, False),
(pd.Series, False),
(pd.DataFrame, False),
Expand Down Expand Up @@ -189,3 +202,17 @@ def box_with_datetime(request):
Like `box`, but specific to datetime64 for also testing DatetimeArray
"""
return request.param


@pytest.fixture(params=[(pd.Index, False),
(pd.Series, False),
(pd.DataFrame, False),
(pd.DataFrame, True),
(DatetimeArray, False)],
ids=lambda x: x[0].__name__ + '-' + str(x[1]))
def box_T_with_datetime(request):
"""
Like `box`, but specific to datetime64 for also testing DatetimeArray,
and both transpose cases for DataFrame
"""
return request.param
29 changes: 14 additions & 15 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,20 +1187,19 @@ def test_dti_add_intarray_no_freq(self, box):
# Binary operations DatetimeIndex and timedelta-like

def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours,
box_with_datetime):
box_T_with_datetime):
# GH#22005, GH#22163 check DataFrame doesn't raise TypeError
box = box_with_datetime
box, transpose = box_T_with_datetime

tz = tz_naive_fixture
rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)

# FIXME: calling with transpose=True raises ValueError
rng = tm.box_expected(rng, box, transpose=False)
rng = tm.box_expected(rng, box, transpose=transpose)

result = rng + two_hours
expected = pd.date_range('2000-01-01 02:00',
'2000-02-01 02:00', tz=tz)
expected = tm.box_expected(expected, box, transpose=False)
expected = tm.box_expected(expected, box, transpose=transpose)
tm.assert_equal(result, expected)

def test_dti_iadd_timedeltalike(self, tz_naive_fixture, two_hours):
Expand All @@ -1227,18 +1226,18 @@ def test_dti_isub_timedeltalike(self, tz_naive_fixture, two_hours):
rng -= two_hours
tm.assert_index_equal(rng, expected)

def test_dt64arr_add_sub_td64_nat(self, box, tz_naive_fixture):
def test_dt64arr_add_sub_td64_nat(self, box_with_transpose,
tz_naive_fixture):
# GH#23320 special handling for timedelta64("NaT")
box, transpose = box_with_transpose
tz = tz_naive_fixture

dti = pd.date_range("1994-04-01", periods=9, tz=tz, freq="QS")
other = np.timedelta64("NaT")
expected = pd.DatetimeIndex(["NaT"] * 9, tz=tz)

# FIXME: fails with transpose=True due to tz-aware DataFrame
# transpose bug
obj = tm.box_expected(dti, box, transpose=False)
expected = tm.box_expected(expected, box, transpose=False)
obj = tm.box_expected(dti, box, transpose=transpose)
expected = tm.box_expected(expected, box, transpose=transpose)

result = obj + other
tm.assert_equal(result, expected)
Expand Down Expand Up @@ -1828,9 +1827,10 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
res3 = dti - other
tm.assert_series_equal(res3, expected_sub)

def test_dti_add_offset_tzaware(self, tz_aware_fixture, box_with_datetime):
def test_dti_add_offset_tzaware(self, tz_aware_fixture,
box_T_with_datetime):
# GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
box = box_with_datetime
box, transpose = box_T_with_datetime

timezone = tz_aware_fixture
if timezone == 'US/Pacific':
Expand All @@ -1843,9 +1843,8 @@ def test_dti_add_offset_tzaware(self, tz_aware_fixture, box_with_datetime):
expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00',
'2010-11-01 07:00'], freq='H', tz=timezone)

# FIXME: these raise ValueError with transpose=True
dates = tm.box_expected(dates, box, transpose=False)
expected = tm.box_expected(expected, box, transpose=False)
dates = tm.box_expected(dates, box, transpose=transpose)
expected = tm.box_expected(expected, box, transpose=transpose)

# TODO: parametrize over the scalar being added? radd? sub?
offset = dates + pd.offsets.Hour(5)
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,19 +413,19 @@ def test_td64arr_sub_timestamp_raises(self, box):
with pytest.raises(TypeError, match=msg):
idx - Timestamp('2011-01-01')

def test_td64arr_add_timestamp(self, box, tz_naive_fixture):
def test_td64arr_add_timestamp(self, box_with_transpose, tz_naive_fixture):
# GH#23215
# TODO: parametrize over scalar datetime types?
box, transpose = box_with_transpose

tz = tz_naive_fixture
other = Timestamp('2011-01-01', tz=tz)

idx = TimedeltaIndex(['1 day', '2 day'])
expected = DatetimeIndex(['2011-01-02', '2011-01-03'], tz=tz)

# FIXME: fails with transpose=True because of tz-aware DataFrame
# transpose bug
idx = tm.box_expected(idx, box, transpose=False)
expected = tm.box_expected(expected, box, transpose=False)
idx = tm.box_expected(idx, box, transpose=transpose)
expected = tm.box_expected(expected, box, transpose=transpose)

result = idx + other
tm.assert_equal(result, expected)
Expand Down
52 changes: 52 additions & 0 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,3 +939,55 @@ def test_unstack_fill_frame_object():
index=list('xyz')
)
assert_frame_equal(result, expected)


def test_transpose_dt64tz():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also test transposing a mixed dtype dataframe if we aren't already (e.g. datetimetz and int)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do. Only real difference is that it doesn't round-trip (though I think that behavior is correct)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that makes sense. Just good to ensure that there's no aggressive coercion of other data dtypes in the DataFrame once transposed i.e. the columns in df.T end up as object.

# GH#23730 transposing a DataFrame with a single datetime64tz column should
# not raise ValueError

dti = pd.date_range('1977-04-15', periods=3, freq='MS', tz='US/Hawaii')

# For reasons unknown this error shows up differently depending on how the
# DataFrame was constructed, so we do this several different ways.

df1 = dti.to_series(keep_tz=True).to_frame()
df2 = pd.DataFrame(dti, index=dti)
df3 = pd.Series(dti, index=dti).to_frame()

tm.assert_frame_equal(df1, df2)
tm.assert_frame_equal(df2, df3)

for frame in [df1, df2, df3]:
frame.T
tm.assert_frame_equal(frame.T.T, frame)

# Now going the other direction, we have to manually construct the
# transposed dataframe
df = pd.DataFrame(np.arange(9).reshape(3, 3))
df[0] = dti[0]
df[1] = dti[1]
df[2] = dti[2]

df.T
tm.assert_frame_equal(df.T.T, df)


def test_transpose_dt64tz_mixed_tz():
# GH#23730 transposing two datetimetz columns with different tzs
dti = pd.date_range('1977-04-15', periods=3, freq='MS', tz='US/Hawaii')
dti2 = pd.date_range('1977-04-15', periods=3, freq='MS', tz='UTC')

df = pd.DataFrame({"A": dti, "B": dti2}, columns=["A", "B"])
df.T
tm.assert_frame_equal(df.T.T, df.astype(object))


def test_transpose_dt64tz_mixed():
# GH#23730 transposing with datetimetz column and numeric column,
# did not raise before but covering our bases

dti = pd.date_range('1977-04-15', periods=3, freq='MS', tz='US/Hawaii')
df = pd.DataFrame({"A": dti, "B": [3, 4, 5]}, columns=["A", "B"])

df.T
tm.assert_frame_equal(df.T.T, df.astype(object))