Skip to content

Commit 3a9c093

Browse files
committed
BUG: fix read_csv to parse timezone correctly
- use box=True for to_datetime(), and adjust downstream processing to the change. - resolve pandas-dev#22256
1 parent 4f11d1a commit 3a9c093

File tree

3 files changed

+25
-7
lines changed

3 files changed

+25
-7
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,7 @@ I/O
676676

677677
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
678678
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
679+
- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
679680
-
680681

681682
Plotting

pandas/io/parsers.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1620,7 +1620,6 @@ def _infer_types(self, values, na_values, try_num_bool=True):
16201620
converted : ndarray
16211621
na_count : int
16221622
"""
1623-
16241623
na_count = 0
16251624
if issubclass(values.dtype.type, (np.number, np.bool_)):
16261625
mask = algorithms.isin(values, list(na_values))
@@ -1633,20 +1632,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
16331632

16341633
if try_num_bool:
16351634
try:
1636-
result = lib.maybe_convert_numeric(values, na_values, False)
1635+
result = lib.maybe_convert_numeric(np.asarray(values),
1636+
na_values, False)
16371637
na_count = isna(result).sum()
16381638
except Exception:
16391639
result = values
16401640
if values.dtype == np.object_:
1641-
na_count = parsers.sanitize_objects(result, na_values,
1642-
False)
1641+
na_count = parsers.sanitize_objects(np.asarray(result),
1642+
na_values, False)
16431643
else:
16441644
result = values
16451645
if values.dtype == np.object_:
1646-
na_count = parsers.sanitize_objects(values, na_values, False)
1646+
na_count = parsers.sanitize_objects(np.asarray(values),
1647+
na_values, False)
16471648

16481649
if result.dtype == np.object_ and try_num_bool:
1649-
result = libops.maybe_convert_bool(values,
1650+
result = libops.maybe_convert_bool(np.asarray(values),
16501651
true_values=self.true_values,
16511652
false_values=self.false_values)
16521653

@@ -3033,7 +3034,7 @@ def converter(*date_cols):
30333034
return tools.to_datetime(
30343035
ensure_object(strs),
30353036
utc=None,
3036-
box=False,
3037+
box=True,
30373038
dayfirst=dayfirst,
30383039
errors='ignore',
30393040
infer_datetime_format=infer_datetime_format

pandas/tests/io/parser/parse_dates.py

+16
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas._libs.tslibs import parsing
1414
from pandas._libs.tslib import Timestamp
1515

16+
import pytz
1617
import pandas as pd
1718
import pandas.io.parsers as parsers
1819
import pandas.core.tools.datetimes as tools
@@ -674,3 +675,18 @@ def test_parse_date_float(self, data, expected, parse_dates):
674675
# (i.e. float precision should remain unchanged).
675676
result = self.read_csv(StringIO(data), parse_dates=parse_dates)
676677
tm.assert_frame_equal(result, expected)
678+
679+
def test_parse_timezone(self):
680+
data = """dt,val
681+
2018-01-04 09:01:00+09:00,23350
682+
2018-01-04 09:02:00+09:00,23400
683+
2018-01-04 09:03:00+09:00,23400
684+
2018-01-04 09:04:00+09:00,23400
685+
2018-01-04 09:05:00+09:00,23400"""
686+
parsed = self.read_csv(StringIO(data), parse_dates=['dt'])
687+
dti = pd.DatetimeIndex(start='2018-01-04 09:01:00',
688+
end='2018-01-04 09:05:00', freq='1min',
689+
tz=pytz.FixedOffset(540))
690+
expected_data = {'dt': dti, 'val': [23350, 23400, 23400, 23400, 23400]}
691+
expected = DataFrame(expected_data)
692+
tm.assert_frame_equal(parsed, expected)

0 commit comments

Comments
 (0)