Skip to content

Commit 0567799

Browse files
committed
BUG: Bug in DataFrame construction with nulls and datetimes in a list-like
closes #15869
1 parent e50d397 commit 0567799

File tree

5 files changed

+50
-23
lines changed

5 files changed

+50
-23
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,7 @@ Conversion
997997
- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`)
998998
- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`)
999999
- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`)
1000+
- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`)
10001001

10011002
Indexing
10021003
^^^^^^^^

pandas/_libs/src/inference.pyx

+25-11
Original file line numberDiff line numberDiff line change
@@ -439,31 +439,45 @@ def infer_dtype(object value):
439439
return 'mixed'
440440

441441

442-
cpdef bint is_possible_datetimelike_array(object arr):
443-
# determine if we have a possible datetimelike (or null-like) array
442+
cpdef object infer_datetimelike_array(object arr):
443+
""" infer if we have a datetime or timedelta array """
444444
cdef:
445445
Py_ssize_t i, n = len(arr)
446446
bint seen_timedelta = 0, seen_datetime = 0
447+
bint seen_nat = 0, seen_string = 0, seen_null = 0
447448
object v
448449

449450
for i in range(n):
450451
v = arr[i]
451452
if util.is_string_object(v):
452-
continue
453+
seen_string = 1
453454
elif util._checknull(v):
454-
continue
455-
elif is_datetime(v):
456-
seen_datetime=1
457-
elif is_timedelta(v):
458-
seen_timedelta=1
455+
# nan or None
456+
seen_null = 1
457+
elif v is NaT:
458+
seen_nat = 1
459+
elif is_datetime(v) or util.is_datetime64_object(v):
460+
seen_datetime = 1
461+
elif is_timedelta(v) or util.is_timedelta64_object(v):
462+
seen_timedelta = 1
459463
else:
460-
return False
461-
return seen_datetime or seen_timedelta
464+
return 'mixed'
465+
466+
if seen_datetime and not seen_timedelta:
467+
return 'datetime'
468+
elif seen_timedelta:
469+
return 'timedelta'
470+
471+
if seen_nat:
472+
return 'datetime_or_timedelta'
473+
474+
return 'mixed'
475+
462476

463477

464478
cdef inline bint is_null_datetimelike(v):
465479
# determine if we have a null for a timedelta/datetime (or integer
466-
# versions)x
480+
# versions)
467481
if util._checknull(v):
468482
return True
469483
elif v is NaT:

pandas/tests/frame/test_constructors.py

+9
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,15 @@ def test_constructor_with_datetimes(self):
13661366
.reset_index(drop=True), 'b': i_no_tz})
13671367
tm.assert_frame_equal(df, expected)
13681368

1369+
def test_constructor_datetimes_with_nulls(self):
1370+
# gh-15869
1371+
for arr in [np.array([None, None, None, None,
1372+
datetime.now(), None]),
1373+
np.array([None, None, datetime.now(), None])]:
1374+
result = DataFrame(arr).get_dtype_counts()
1375+
expected = Series({'datetime64[ns]': 1})
1376+
tm.assert_series_equal(result, expected)
1377+
13691378
def test_constructor_for_list_with_dtypes(self):
13701379
# TODO(wesm): unused
13711380
intname = np.dtype(np.int_).name # noqa

pandas/tests/series/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,14 @@ def test_constructor_datelike_coercion(self):
327327
result = df.loc['216']
328328
self.assertTrue(result.dtype == object)
329329

330+
def test_constructor_datetimes_with_nulls(self):
331+
# gh-15869
332+
for arr in [np.array([None, None, None, None,
333+
datetime.now(), None]),
334+
np.array([None, None, datetime.now(), None])]:
335+
result = Series(arr)
336+
assert result.dtype == 'M8[ns]'
337+
330338
def test_constructor_dtype_datetime64(self):
331339

332340
s = Series(iNaT, dtype='M8[ns]', index=lrange(5))

pandas/types/cast.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -806,25 +806,20 @@ def _try_timedelta(v):
806806
except:
807807
return v
808808

809-
# do a quick inference for perf
810-
sample = v[:min(3, len(v))]
811-
inferred_type = lib.infer_dtype(sample)
812-
809+
inferred_type = lib.infer_datetimelike_array(_ensure_object(v))
813810
if (inferred_type in ['datetime', 'datetime64'] or
814811
(convert_dates and inferred_type in ['date'])):
815812
value = _try_datetime(v)
816813
elif inferred_type in ['timedelta', 'timedelta64']:
817814
value = _try_timedelta(v)
815+
elif inferred_type in ['datetime_or_timedelta']:
818816

819-
# It's possible to have nulls intermixed within the datetime or
820-
# timedelta. These will in general have an inferred_type of 'mixed',
821-
# so have to try both datetime and timedelta.
822-
823-
# try timedelta first to avoid spurious datetime conversions
824-
# e.g. '00:00:01' is a timedelta but technically is also a datetime
825-
elif inferred_type in ['mixed']:
817+
# if all NaT, return as datetime
818+
if isnull(v).all():
819+
value = _try_datetime(v)
820+
else:
826821

827-
if lib.is_possible_datetimelike_array(_ensure_object(v)):
822+
# we have at least a NaT and a string
828823
value = _try_timedelta(v)
829824
if lib.infer_dtype(value) in ['mixed']:
830825
value = _try_datetime(v)

0 commit comments

Comments
 (0)