Skip to content

Commit a6eb92b

Browse files
BUG: DataFrame(dict_of_series_ raising depending on order of dict (#45018)
Co-authored-by: Jeff Reback <[email protected]>
1 parent 4429d0f commit a6eb92b

File tree

5 files changed

+97
-8
lines changed

5 files changed

+97
-8
lines changed

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ Other Deprecations
610610
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
611611
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
612612
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
613+
- Deprecated :meth:`DatetimeIndex.union_many`, use :meth:`DatetimeIndex.union` instead (:issue:`44091`)
613614
- Deprecated :meth:`.Groupby.pad` in favor of :meth:`.Groupby.ffill` (:issue:`33396`)
614615
- Deprecated :meth:`.Groupby.backfill` in favor of :meth:`.Groupby.bfill` (:issue:`33396`)
615616
- Deprecated :meth:`.Resample.pad` in favor of :meth:`.Resample.ffill` (:issue:`33396`)
@@ -709,6 +710,7 @@ Datetimelike
709710
- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
710711
- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`)
711712
- Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`)
713+
- Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`)
712714
-
713715

714716
Timedelta

pandas/core/indexes/api.py

+36-7
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
)
99
from pandas.errors import InvalidIndexError
1010

11+
from pandas.core.dtypes.common import is_dtype_equal
12+
1113
from pandas.core.indexes.base import (
1214
Index,
1315
_new_Index,
@@ -213,14 +215,41 @@ def conv(i):
213215

214216
if kind == "special":
215217
result = indexes[0]
218+
first = result
219+
220+
dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
221+
dti_tzs = [x for x in dtis if x.tz is not None]
222+
if len(dti_tzs) not in [0, len(dtis)]:
223+
# TODO: this behavior is not tested (so may not be desired),
224+
# but is kept in order to keep behavior the same when
225+
# deprecating union_many
226+
# test_frame_from_dict_with_mixed_indexes
227+
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
228+
229+
if len(dtis) == len(indexes):
230+
sort = True
231+
if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes):
232+
# i.e. timezones mismatch
233+
# TODO(2.0): once deprecation is enforced, this union will
234+
# cast to UTC automatically.
235+
indexes = [x.tz_convert("UTC") for x in indexes]
236+
237+
result = indexes[0]
238+
239+
elif len(dtis) > 1:
240+
# If we have mixed timezones, our casting behavior may depend on
241+
# the order of indexes, which we don't want.
242+
sort = False
243+
244+
# TODO: what about Categorical[dt64]?
245+
# test_frame_from_dict_with_mixed_indexes
246+
indexes = [x.astype(object, copy=False) for x in indexes]
247+
result = indexes[0]
248+
249+
for other in indexes[1:]:
250+
result = result.union(other, sort=None if sort else False)
251+
return result
216252

217-
if hasattr(result, "union_many"):
218-
# DatetimeIndex
219-
return result.union_many(indexes[1:])
220-
else:
221-
for other in indexes[1:]:
222-
result = result.union(other, sort=None if sort else False)
223-
return result
224253
elif kind == "array":
225254
index = indexes[0]
226255
if not all(index.equals(other) for other in indexes[1:]):

pandas/core/indexes/datetimes.py

+7
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,13 @@ def union_many(self, others):
387387
"""
388388
A bit of a hack to accelerate unioning a collection of indexes.
389389
"""
390+
warnings.warn(
391+
"DatetimeIndex.union_many is deprecated and will be removed in "
392+
"a future version. Use obj.union instead.",
393+
FutureWarning,
394+
stacklevel=find_stack_level(),
395+
)
396+
390397
this = self
391398

392399
for other in others:

pandas/tests/frame/test_constructors.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -2666,6 +2666,50 @@ def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):
26662666
exp = pd.period_range("1/1/1980", "1/1/2012", freq="M")
26672667
tm.assert_index_equal(df.index, exp)
26682668

2669+
def test_frame_from_dict_with_mixed_tzaware_indexes(self):
2670+
# GH#44091
2671+
dti = date_range("2016-01-01", periods=3)
2672+
2673+
ser1 = Series(range(3), index=dti)
2674+
ser2 = Series(range(3), index=dti.tz_localize("UTC"))
2675+
ser3 = Series(range(3), index=dti.tz_localize("US/Central"))
2676+
ser4 = Series(range(3))
2677+
2678+
# no tz-naive, but we do have mixed tzs and a non-DTI
2679+
df1 = DataFrame({"A": ser2, "B": ser3, "C": ser4})
2680+
exp_index = Index(
2681+
list(ser2.index) + list(ser3.index) + list(ser4.index), dtype=object
2682+
)
2683+
tm.assert_index_equal(df1.index, exp_index)
2684+
2685+
df2 = DataFrame({"A": ser2, "C": ser4, "B": ser3})
2686+
exp_index3 = Index(
2687+
list(ser2.index) + list(ser4.index) + list(ser3.index), dtype=object
2688+
)
2689+
tm.assert_index_equal(df2.index, exp_index3)
2690+
2691+
df3 = DataFrame({"B": ser3, "A": ser2, "C": ser4})
2692+
exp_index3 = Index(
2693+
list(ser3.index) + list(ser2.index) + list(ser4.index), dtype=object
2694+
)
2695+
tm.assert_index_equal(df3.index, exp_index3)
2696+
2697+
df4 = DataFrame({"C": ser4, "B": ser3, "A": ser2})
2698+
exp_index4 = Index(
2699+
list(ser4.index) + list(ser3.index) + list(ser2.index), dtype=object
2700+
)
2701+
tm.assert_index_equal(df4.index, exp_index4)
2702+
2703+
# TODO: not clear if these raising is desired (no extant tests),
2704+
# but this is de facto behavior 2021-12-22
2705+
msg = "Cannot join tz-naive with tz-aware DatetimeIndex"
2706+
with pytest.raises(TypeError, match=msg):
2707+
DataFrame({"A": ser2, "B": ser3, "C": ser4, "D": ser1})
2708+
with pytest.raises(TypeError, match=msg):
2709+
DataFrame({"A": ser2, "B": ser3, "D": ser1})
2710+
with pytest.raises(TypeError, match=msg):
2711+
DataFrame({"D": ser1, "A": ser2, "B": ser3})
2712+
26692713

26702714
class TestDataFrameConstructorWithDtypeCoercion:
26712715
def test_floating_values_integer_dtype(self):
@@ -2911,7 +2955,7 @@ def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
29112955
DataFrame(arr2, columns=["foo", "bar"])
29122956

29132957

2914-
def get1(obj):
2958+
def get1(obj): # TODO: make a helper in tm?
29152959
if isinstance(obj, Series):
29162960
return obj.iloc[0]
29172961
else:

pandas/tests/indexes/datetimes/test_setops.py

+7
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
2727

2828

29+
def test_union_many_deprecated():
30+
dti = date_range("2016-01-01", periods=3)
31+
32+
with tm.assert_produces_warning(FutureWarning):
33+
dti.union_many([dti, dti])
34+
35+
2936
class TestDatetimeIndexSetOps:
3037
tz = [
3138
None,

0 commit comments

Comments
 (0)