Skip to content

Commit 45c0b5f

Browse files
swyoonjreback
authored andcommitted
BUG: fix read_csv to parse timezone correctly (#22380)
1 parent 1584530 commit 45c0b5f

File tree

3 files changed

+27
-17
lines changed

3 files changed

+27
-17
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,7 @@ I/O
676676

677677
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
678678
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
679+
- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
679680
-
680681

681682
Plotting

pandas/io/parsers.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1620,7 +1620,6 @@ def _infer_types(self, values, na_values, try_num_bool=True):
16201620
converted : ndarray
16211621
na_count : int
16221622
"""
1623-
16241623
na_count = 0
16251624
if issubclass(values.dtype.type, (np.number, np.bool_)):
16261625
mask = algorithms.isin(values, list(na_values))
@@ -1633,20 +1632,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
16331632

16341633
if try_num_bool:
16351634
try:
1636-
result = lib.maybe_convert_numeric(values, na_values, False)
1635+
result = lib.maybe_convert_numeric(np.asarray(values),
1636+
na_values, False)
16371637
na_count = isna(result).sum()
16381638
except Exception:
16391639
result = values
16401640
if values.dtype == np.object_:
1641-
na_count = parsers.sanitize_objects(result, na_values,
1642-
False)
1641+
na_count = parsers.sanitize_objects(np.asarray(result),
1642+
na_values, False)
16431643
else:
16441644
result = values
16451645
if values.dtype == np.object_:
1646-
na_count = parsers.sanitize_objects(values, na_values, False)
1646+
na_count = parsers.sanitize_objects(np.asarray(values),
1647+
na_values, False)
16471648

16481649
if result.dtype == np.object_ and try_num_bool:
1649-
result = libops.maybe_convert_bool(values,
1650+
result = libops.maybe_convert_bool(np.asarray(values),
16501651
true_values=self.true_values,
16511652
false_values=self.false_values)
16521653

@@ -3033,7 +3034,7 @@ def converter(*date_cols):
30333034
return tools.to_datetime(
30343035
ensure_object(strs),
30353036
utc=None,
3036-
box=False,
3037+
box=True,
30373038
dayfirst=dayfirst,
30383039
errors='ignore',
30393040
infer_datetime_format=infer_datetime_format

pandas/tests/io/parser/parse_dates.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
from pandas._libs.tslibs import parsing
1414
from pandas._libs.tslib import Timestamp
1515

16+
import pytz
1617
import pandas as pd
1718
import pandas.io.parsers as parsers
18-
import pandas.core.tools.datetimes as tools
1919
import pandas.util.testing as tm
2020

2121
import pandas.io.date_converters as conv
@@ -356,21 +356,13 @@ def test_parse_dates_custom_euroformat(self):
356356

357357
def test_parse_tz_aware(self):
358358
# See gh-1693
359-
import pytz
360359
data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")
361360

362361
# it works
363362
result = self.read_csv(data, index_col=0, parse_dates=True)
364363
stamp = result.index[0]
365364
assert stamp.minute == 39
366-
try:
367-
assert result.index.tz is pytz.utc
368-
except AssertionError:
369-
arr = result.index.to_pydatetime()
370-
result = tools.to_datetime(arr, utc=True)[0]
371-
assert stamp.minute == result.minute
372-
assert stamp.hour == result.hour
373-
assert stamp.day == result.day
365+
assert result.index.tz is pytz.utc
374366

375367
def test_multiple_date_cols_index(self):
376368
data = """
@@ -674,3 +666,19 @@ def test_parse_date_float(self, data, expected, parse_dates):
674666
# (i.e. float precision should remain unchanged).
675667
result = self.read_csv(StringIO(data), parse_dates=parse_dates)
676668
tm.assert_frame_equal(result, expected)
669+
670+
def test_parse_timezone(self):
671+
# gh-22256
672+
data = """dt,val
673+
2018-01-04 09:01:00+09:00,23350
674+
2018-01-04 09:02:00+09:00,23400
675+
2018-01-04 09:03:00+09:00,23400
676+
2018-01-04 09:04:00+09:00,23400
677+
2018-01-04 09:05:00+09:00,23400"""
678+
parsed = self.read_csv(StringIO(data), parse_dates=['dt'])
679+
dti = pd.DatetimeIndex(start='2018-01-04 09:01:00',
680+
end='2018-01-04 09:05:00', freq='1min',
681+
tz=pytz.FixedOffset(540))
682+
expected_data = {'dt': dti, 'val': [23350, 23400, 23400, 23400, 23400]}
683+
expected = DataFrame(expected_data)
684+
tm.assert_frame_equal(parsed, expected)

0 commit comments

Comments
 (0)