Skip to content

Commit f2a91a0

Browse files
authored
BUG: guess_datetime_format doesn't guess 2018-01-01T00:00:00.000 (#49047)
* guess nanoseconds * add gh reference number Co-authored-by: MarcoGorelli <>
1 parent 8974a95 commit f2a91a0

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

pandas/_libs/tslibs/parsing.pyx

+14-2
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
976976
(('hour',), '%H', 2),
977977
(('minute',), '%M', 2),
978978
(('second',), '%S', 2),
979-
(('microsecond',), '%f', 6),
980979
(('second', 'microsecond'), '%S.%f', 0),
981980
(('tzinfo',), '%z', 0),
982981
(('tzinfo',), '%Z', 0),
@@ -1048,7 +1047,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
10481047

10491048
parsed_formatted = parsed_datetime.strftime(attr_format)
10501049
for i, token_format in enumerate(format_guess):
1051-
token_filled = tokens[i].zfill(padding)
1050+
token_filled = _fill_token(tokens[i], padding)
10521051
if token_format is None and token_filled == parsed_formatted:
10531052
format_guess[i] = attr_format
10541053
tokens[i] = token_filled
@@ -1090,6 +1089,19 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
10901089
else:
10911090
return None
10921091

1092+
cdef str _fill_token(token: str, padding: int):
1093+
cdef str token_filled
1094+
if '.' not in token:
1095+
token_filled = token.zfill(padding)
1096+
else:
1097+
seconds, nanoseconds = token.split('.')
1098+
seconds = f'{int(seconds):02d}'
1099+
# right-pad so we get nanoseconds, then only take
1100+
# first 6 digits (microseconds) as stdlib datetime
1101+
# doesn't support nanoseconds
1102+
nanoseconds = nanoseconds.ljust(9, '0')[:6]
1103+
token_filled = f'{seconds}.{nanoseconds}'
1104+
return token_filled
10931105

10941106
@cython.wraparound(False)
10951107
@cython.boundscheck(False)

pandas/core/tools/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ def to_datetime(
820820
to the day starting at noon on January 1, 4713 BC.
821821
- If Timestamp convertible (Timestamp, dt.datetime, np.datetimt64 or date
822822
string), origin is set to Timestamp identified by origin.
823-
- If a float or integer, origin is the mullisecond difference
823+
- If a float or integer, origin is the millisecond difference
824824
relative to 1970-01-01.
825825
cache : bool, default True
826826
If :const:`True`, use a cache of unique, converted dates to apply the

pandas/tests/tslibs/test_parsing.py

+15
Original file line numberDiff line numberDiff line change
@@ -295,3 +295,18 @@ def test_is_iso_format(fmt, expected):
295295
# see gh-41047
296296
result = parsing.format_is_iso(fmt)
297297
assert result == expected
298+
299+
300+
@pytest.mark.parametrize(
301+
"input",
302+
[
303+
"2018-01-01T00:00:00.123456789",
304+
"2018-01-01T00:00:00.123456",
305+
"2018-01-01T00:00:00.123",
306+
],
307+
)
308+
def test_guess_datetime_format_f(input):
309+
# https://github.com/pandas-dev/pandas/issues/49043
310+
result = parsing.guess_datetime_format(input)
311+
expected = "%Y-%m-%dT%H:%M:%S.%f"
312+
assert result == expected

0 commit comments

Comments
 (0)