Skip to content

Commit 9e7dc17

Browse files
committed
Merge pull request #11146 from chris-b1/inference-padding
PERF: infer_datetime_format without padding #11142
2 parents d35c84b + 417dbb4 commit 9e7dc17

File tree

3 files changed

+37
-13
lines changed

3 files changed

+37
-13
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,7 @@ Performance Improvements
10311031
- 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`)
10321032
- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`)
10331033
- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`)
1034+
- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`)
10341035

10351036
.. _whatsnew_0170.bug_fixes:
10361037

pandas/tseries/tests/test_timeseries.py

+18
Original file line numberDiff line numberDiff line change
@@ -4615,6 +4615,24 @@ def test_guess_datetime_format_invalid_inputs(self):
46154615
for invalid_dt in invalid_dts:
46164616
self.assertTrue(tools._guess_datetime_format(invalid_dt) is None)
46174617

4618+
def test_guess_datetime_format_nopadding(self):
4619+
# GH 11142
4620+
dt_string_to_format = (
4621+
('2011-1-1', '%Y-%m-%d'),
4622+
('30-1-2011', '%d-%m-%Y'),
4623+
('1/1/2011', '%m/%d/%Y'),
4624+
('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'),
4625+
('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'),
4626+
('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')
4627+
)
4628+
4629+
for dt_string, dt_format in dt_string_to_format:
4630+
self.assertEqual(
4631+
tools._guess_datetime_format(dt_string),
4632+
dt_format
4633+
)
4634+
4635+
46184636
def test_guess_datetime_format_for_array(self):
46194637
expected_format = '%Y-%m-%d %H:%M:%S.%f'
46204638
dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format)

pandas/tseries/tools.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -86,20 +86,21 @@ def _guess_datetime_format(dt_str, dayfirst=False,
8686
if not isinstance(dt_str, compat.string_types):
8787
return None
8888

89-
day_attribute_and_format = (('day',), '%d')
89+
day_attribute_and_format = (('day',), '%d', 2)
9090

91+
# attr name, format, padding (if any)
9192
datetime_attrs_to_format = [
92-
(('year', 'month', 'day'), '%Y%m%d'),
93-
(('year',), '%Y'),
94-
(('month',), '%B'),
95-
(('month',), '%b'),
96-
(('month',), '%m'),
93+
(('year', 'month', 'day'), '%Y%m%d', 0),
94+
(('year',), '%Y', 0),
95+
(('month',), '%B', 0),
96+
(('month',), '%b', 0),
97+
(('month',), '%m', 2),
9798
day_attribute_and_format,
98-
(('hour',), '%H'),
99-
(('minute',), '%M'),
100-
(('second',), '%S'),
101-
(('microsecond',), '%f'),
102-
(('second', 'microsecond'), '%S.%f'),
99+
(('hour',), '%H', 2),
100+
(('minute',), '%M', 2),
101+
(('second',), '%S', 2),
102+
(('microsecond',), '%f', 6),
103+
(('second', 'microsecond'), '%S.%f', 0),
103104
]
104105

105106
if dayfirst:
@@ -125,7 +126,7 @@ def _guess_datetime_format(dt_str, dayfirst=False,
125126
format_guess = [None] * len(tokens)
126127
found_attrs = set()
127128

128-
for attrs, attr_format in datetime_attrs_to_format:
129+
for attrs, attr_format, padding in datetime_attrs_to_format:
129130
# If a given attribute has been placed in the format string, skip
130131
# over other formats for that same underlying attribute (IE, month
131132
# can be represented in multiple different ways)
@@ -134,9 +135,11 @@ def _guess_datetime_format(dt_str, dayfirst=False,
134135

135136
if all(getattr(parsed_datetime, attr) is not None for attr in attrs):
136137
for i, token_format in enumerate(format_guess):
138+
token_filled = tokens[i].zfill(padding)
137139
if (token_format is None and
138-
tokens[i] == parsed_datetime.strftime(attr_format)):
140+
token_filled == parsed_datetime.strftime(attr_format)):
139141
format_guess[i] = attr_format
142+
tokens[i] = token_filled
140143
found_attrs.update(attrs)
141144
break
142145

@@ -163,6 +166,8 @@ def _guess_datetime_format(dt_str, dayfirst=False,
163166

164167
guessed_format = ''.join(output_format)
165168

169+
# rebuild string, capturing any inferred padding
170+
dt_str = ''.join(tokens)
166171
if parsed_datetime.strftime(guessed_format) == dt_str:
167172
return guessed_format
168173

0 commit comments

Comments
 (0)