Skip to content

Commit 41004d9

Browse files
jamestran201-altjreback
authored andcommitted
BUG: prevent coercion to datetime64[ns] when a Series is initialized with both tz-naive and tz-aware (#18361)
1 parent 4e98a7b commit 41004d9

File tree

4 files changed

+97
-4
lines changed

4 files changed

+97
-4
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -195,5 +195,5 @@ Other
195195
^^^^^
196196

197197
- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
198-
-
198+
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
199199
-

pandas/_libs/src/inference.pyx

+17-3
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ cpdef object infer_datetimelike_array(object arr):
464464
- timedelta: we have *only* timedeltas and maybe strings, nulls
465465
- nat: we do not have *any* date, datetimes or timedeltas, but do have
466466
at least a NaT
467-
- mixed: other objects (strings or actual objects)
467+
- mixed: other objects (strings, a mix of tz-aware and tz-naive, or
468+
actual objects)
468469
469470
Parameters
470471
----------
@@ -479,6 +480,7 @@ cpdef object infer_datetimelike_array(object arr):
479480
cdef:
480481
Py_ssize_t i, n = len(arr)
481482
bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0
483+
bint seen_tz_aware = 0, seen_tz_naive = 0
482484
bint seen_nat = 0
483485
list objs = []
484486
object v
@@ -496,8 +498,20 @@ cpdef object infer_datetimelike_array(object arr):
496498
pass
497499
elif v is NaT:
498500
seen_nat = 1
499-
elif is_datetime(v) or util.is_datetime64_object(v):
500-
# datetime, or np.datetime64
501+
elif is_datetime(v):
502+
# datetime
503+
seen_datetime = 1
504+
505+
# disambiguate between tz-naive and tz-aware
506+
if v.tzinfo is None:
507+
seen_tz_naive = 1
508+
else:
509+
seen_tz_aware = 1
510+
511+
if seen_tz_naive and seen_tz_aware:
512+
return 'mixed'
513+
elif util.is_datetime64_object(v):
514+
# np.datetime64
501515
seen_datetime = 1
502516
elif is_date(v):
503517
seen_date = 1

pandas/tests/dtypes/test_inference.py

+70
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,10 @@ def test_mixed_dtypes_remain_object_array(self):
419419

420420
class TestTypeInference(object):
421421

422+
# Dummy class used for testing with Python objects
423+
class Dummy():
424+
pass
425+
422426
def test_length_zero(self):
423427
result = lib.infer_dtype(np.array([], dtype='i4'))
424428
assert result == 'integer'
@@ -655,6 +659,72 @@ def test_infer_dtype_period(self):
655659
dtype=object)
656660
assert lib.infer_dtype(arr) == 'mixed'
657661

662+
@pytest.mark.parametrize(
663+
"data",
664+
[
665+
[datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)],
666+
[Timestamp("20170612"), Timestamp("20170311")],
667+
[Timestamp("20170612", tz='US/Eastern'),
668+
Timestamp("20170311", tz='US/Eastern')],
669+
[date(2017, 6, 12),
670+
Timestamp("20170311", tz='US/Eastern')],
671+
[np.datetime64("2017-06-12"), np.datetime64("2017-03-11")],
672+
[np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)]
673+
]
674+
)
675+
def test_infer_datetimelike_array_datetime(self, data):
676+
assert lib.infer_datetimelike_array(data) == "datetime"
677+
678+
@pytest.mark.parametrize(
679+
"data",
680+
[
681+
[timedelta(2017, 6, 12), timedelta(2017, 3, 11)],
682+
[timedelta(2017, 6, 12), date(2017, 3, 11)],
683+
[np.timedelta64(2017, "D"), np.timedelta64(6, "s")],
684+
[np.timedelta64(2017, "D"), timedelta(2017, 3, 11)]
685+
]
686+
)
687+
def test_infer_datetimelike_array_timedelta(self, data):
688+
assert lib.infer_datetimelike_array(data) == "timedelta"
689+
690+
def test_infer_datetimelike_array_date(self):
691+
arr = [date(2017, 6, 12), date(2017, 3, 11)]
692+
assert lib.infer_datetimelike_array(arr) == "date"
693+
694+
@pytest.mark.parametrize(
695+
"data",
696+
[
697+
["2017-06-12", "2017-03-11"],
698+
[20170612, 20170311],
699+
[20170612.5, 20170311.8],
700+
[Dummy(), Dummy()],
701+
[Timestamp("20170612"), Timestamp("20170311", tz='US/Eastern')],
702+
[Timestamp("20170612"), 20170311],
703+
[timedelta(2017, 6, 12), Timestamp("20170311", tz='US/Eastern')]
704+
]
705+
)
706+
def test_infer_datetimelike_array_mixed(self, data):
707+
assert lib.infer_datetimelike_array(data) == "mixed"
708+
709+
@pytest.mark.parametrize(
710+
"first, expected",
711+
[
712+
[[None], "mixed"],
713+
[[np.nan], "mixed"],
714+
[[pd.NaT], "nat"],
715+
[[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"],
716+
[[np.datetime64("2017-06-12"), pd.NaT], "datetime"],
717+
[[date(2017, 6, 12), pd.NaT], "date"],
718+
[[timedelta(2017, 6, 12), pd.NaT], "timedelta"],
719+
[[np.timedelta64(2017, "D"), pd.NaT], "timedelta"]
720+
]
721+
)
722+
@pytest.mark.parametrize("second", [None, np.nan])
723+
def test_infer_datetimelike_array_nan_nat_like(self, first, second,
724+
expected):
725+
first.append(second)
726+
assert lib.infer_datetimelike_array(first) == expected
727+
658728
def test_infer_dtype_all_nan_nat_like(self):
659729
arr = np.array([np.nan, np.nan])
660730
assert lib.infer_dtype(arr) == 'floating'

pandas/tests/series/test_constructors.py

+9
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,15 @@ def f():
777777
s = Series([pd.NaT, np.nan, '1 Day'])
778778
assert s.dtype == 'timedelta64[ns]'
779779

780+
# GH 16406
781+
def test_constructor_mixed_tz(self):
782+
s = Series([Timestamp('20130101'),
783+
Timestamp('20130101', tz='US/Eastern')])
784+
expected = Series([Timestamp('20130101'),
785+
Timestamp('20130101', tz='US/Eastern')],
786+
dtype='object')
787+
assert_series_equal(s, expected)
788+
780789
def test_NaT_scalar(self):
781790
series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]')
782791

0 commit comments

Comments
 (0)