Skip to content

Commit 2672a79

Browse files
sinhrksjreback
authored andcommitted
BUG: DatetimeTZBlock can't assign values near dst boundary
Value assignment doesn't work if data contains DST boundary because of internal .localize(None) Author: sinhrks <[email protected]> Closes #14146 from sinhrks/internals_datetimetz and squashes the following commits: 95aa6b5 [sinhrks] BUG: DatetimeTZBlock can't assign values near dst boundary
1 parent ef20980 commit 2672a79

File tree

6 files changed

+158
-42
lines changed

6 files changed

+158
-42
lines changed

doc/source/whatsnew/v0.19.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1559,7 +1559,7 @@ Bug Fixes
15591559
- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`)
15601560
- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`)
15611561
- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`)
1562-
1562+
- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`)
15631563

15641564
- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
15651565
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)

pandas/core/internals.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -1487,7 +1487,10 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
14871487
-------
14881488
a new block(s), the result of the putmask
14891489
"""
1490-
new_values = self.values if inplace else self.values.copy()
1490+
1491+
# use block's copy logic.
1492+
# .values may be an Index which does shallow copy by default
1493+
new_values = self.values if inplace else self.copy().values
14911494
new_values, _, new, _ = self._try_coerce_args(new_values, new)
14921495

14931496
if isinstance(new, np.ndarray) and len(new) == len(mask):
@@ -2314,7 +2317,7 @@ def __init__(self, values, placement, ndim=2, **kwargs):
23142317
if dtype is not None:
23152318
if isinstance(dtype, compat.string_types):
23162319
dtype = DatetimeTZDtype.construct_from_string(dtype)
2317-
values = values.tz_localize('UTC').tz_convert(dtype.tz)
2320+
values = values._shallow_copy(tz=dtype.tz)
23182321

23192322
if values.tz is None:
23202323
raise ValueError("cannot create a DatetimeTZBlock without a tz")
@@ -2381,12 +2384,14 @@ def _try_coerce_args(self, values, other):
23812384
base-type values, values mask, base-type other, other mask
23822385
"""
23832386
values_mask = _block_shape(isnull(values), ndim=self.ndim)
2384-
values = _block_shape(values.tz_localize(None).asi8, ndim=self.ndim)
2387+
# asi8 is a view, needs copy
2388+
values = _block_shape(values.asi8, ndim=self.ndim)
23852389
other_mask = False
23862390

23872391
if isinstance(other, ABCSeries):
23882392
other = self._holder(other)
23892393
other_mask = isnull(other)
2394+
23902395
if isinstance(other, bool):
23912396
raise TypeError
23922397
elif is_null_datelike_scalar(other):
@@ -2395,7 +2400,7 @@ def _try_coerce_args(self, values, other):
23952400
elif isinstance(other, self._holder):
23962401
if other.tz != self.values.tz:
23972402
raise ValueError("incompatible or non tz-aware value")
2398-
other = other.tz_localize(None).asi8
2403+
other = other.asi8
23992404
other_mask = isnull(other)
24002405
elif isinstance(other, (np.datetime64, datetime, date)):
24012406
other = lib.Timestamp(other)
@@ -2405,7 +2410,7 @@ def _try_coerce_args(self, values, other):
24052410
if tz is None or str(tz) != str(self.values.tz):
24062411
raise ValueError("incompatible or non tz-aware value")
24072412
other_mask = isnull(other)
2408-
other = other.tz_localize(None).value
2413+
other = other.value
24092414

24102415
return values, values_mask, other, other_mask
24112416

@@ -2415,12 +2420,12 @@ def _try_coerce_result(self, result):
24152420
if result.dtype.kind in ['i', 'f', 'O']:
24162421
result = result.astype('M8[ns]')
24172422
elif isinstance(result, (np.integer, np.float, np.datetime64)):
2418-
result = lib.Timestamp(result).tz_localize(self.values.tz)
2423+
result = lib.Timestamp(result, tz=self.values.tz)
24192424
if isinstance(result, np.ndarray):
24202425
# allow passing of > 1dim if its trivial
24212426
if result.ndim > 1:
24222427
result = result.reshape(len(result))
2423-
result = self._holder(result).tz_localize(self.values.tz)
2428+
result = self.values._shallow_copy(result)
24242429

24252430
return result
24262431

pandas/tests/indexing/test_coercion.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def test_setitem_series_datetime64tz(self):
229229
# datetime64 + int -> object
230230
# ToDo: The result must be object
231231
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
232-
pd.Timestamp(1).tz_localize(tz),
232+
pd.Timestamp(1, tz=tz),
233233
pd.Timestamp('2011-01-03', tz=tz),
234234
pd.Timestamp('2011-01-04', tz=tz)])
235235
self._assert_setitem_series_conversion(obj, 1, exp,
@@ -1038,7 +1038,7 @@ def test_fillna_series_datetime64tz(self):
10381038
# datetime64tz + int => datetime64tz
10391039
# ToDo: must be object
10401040
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
1041-
pd.Timestamp(1).tz_localize(tz=tz),
1041+
pd.Timestamp(1, tz=tz),
10421042
pd.Timestamp('2011-01-03', tz=tz),
10431043
pd.Timestamp('2011-01-04', tz=tz)])
10441044
self._assert_fillna_conversion(obj, 1, exp,

pandas/tests/series/test_indexing.py

+83
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,89 @@ def test_ix_getitem_iterator(self):
776776
result = self.series.ix[idx]
777777
assert_series_equal(result, self.series[:10])
778778

779+
def test_setitem_with_tz(self):
780+
for tz in ['US/Eastern', 'UTC', 'Asia/Tokyo']:
781+
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
782+
tz=tz))
783+
self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz))
784+
785+
# scalar
786+
s = orig.copy()
787+
s[1] = pd.Timestamp('2011-01-01', tz=tz)
788+
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
789+
pd.Timestamp('2011-01-01 00:00', tz=tz),
790+
pd.Timestamp('2016-01-01 02:00', tz=tz)])
791+
tm.assert_series_equal(s, exp)
792+
793+
s = orig.copy()
794+
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
795+
tm.assert_series_equal(s, exp)
796+
797+
s = orig.copy()
798+
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
799+
tm.assert_series_equal(s, exp)
800+
801+
# vector
802+
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
803+
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
804+
self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz))
805+
806+
s[[1, 2]] = vals
807+
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
808+
pd.Timestamp('2011-01-01 00:00', tz=tz),
809+
pd.Timestamp('2012-01-01 00:00', tz=tz)])
810+
tm.assert_series_equal(s, exp)
811+
812+
s = orig.copy()
813+
s.loc[[1, 2]] = vals
814+
tm.assert_series_equal(s, exp)
815+
816+
s = orig.copy()
817+
s.iloc[[1, 2]] = vals
818+
tm.assert_series_equal(s, exp)
819+
820+
def test_setitem_with_tz_dst(self):
821+
# GH XXX
822+
tz = 'US/Eastern'
823+
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
824+
tz=tz))
825+
self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz))
826+
827+
# scalar
828+
s = orig.copy()
829+
s[1] = pd.Timestamp('2011-01-01', tz=tz)
830+
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
831+
pd.Timestamp('2011-01-01 00:00', tz=tz),
832+
pd.Timestamp('2016-11-06 02:00', tz=tz)])
833+
tm.assert_series_equal(s, exp)
834+
835+
s = orig.copy()
836+
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
837+
tm.assert_series_equal(s, exp)
838+
839+
s = orig.copy()
840+
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
841+
tm.assert_series_equal(s, exp)
842+
843+
# vector
844+
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
845+
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
846+
self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz))
847+
848+
s[[1, 2]] = vals
849+
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
850+
pd.Timestamp('2011-01-01 00:00', tz=tz),
851+
pd.Timestamp('2012-01-01 00:00', tz=tz)])
852+
tm.assert_series_equal(s, exp)
853+
854+
s = orig.copy()
855+
s.loc[[1, 2]] = vals
856+
tm.assert_series_equal(s, exp)
857+
858+
s = orig.copy()
859+
s.iloc[[1, 2]] = vals
860+
tm.assert_series_equal(s, exp)
861+
779862
def test_where(self):
780863
s = Series(np.random.randn(5))
781864
cond = s > 0

pandas/tests/series/test_misc_api.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ def test_copy(self):
241241
self.assertTrue(np.isnan(s2[0]))
242242
self.assertFalse(np.isnan(s[0]))
243243
else:
244-
245244
# we DID modify the original Series
246245
self.assertTrue(np.isnan(s2[0]))
247246
self.assertTrue(np.isnan(s[0]))
@@ -252,6 +251,7 @@ def test_copy(self):
252251
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
253252

254253
for deep in [None, False, True]:
254+
255255
s = Series([Timestamp('2012/01/01', tz='UTC')])
256256

257257
if deep is None:
@@ -263,11 +263,13 @@ def test_copy(self):
263263

264264
# default deep is True
265265
if deep is None or deep is True:
266-
assert_series_equal(s, expected)
266+
# Did not modify original Series
267267
assert_series_equal(s2, expected2)
268+
assert_series_equal(s, expected)
268269
else:
269-
assert_series_equal(s, expected2)
270+
# we DID modify the original Series
270271
assert_series_equal(s2, expected2)
272+
assert_series_equal(s, expected2)
271273

272274
def test_axis_alias(self):
273275
s = Series([1, 2, np.nan])

pandas/tests/series/test_missing.py

+55-29
Original file line numberDiff line numberDiff line change
@@ -130,99 +130,125 @@ def test_datetime64_fillna(self):
130130
def test_datetime64_tz_fillna(self):
131131
for tz in ['US/Eastern', 'Asia/Tokyo']:
132132
# DatetimeBlock
133-
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
134-
'2011-01-03 10:00'), pd.NaT])
133+
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
134+
Timestamp('2011-01-03 10:00'), pd.NaT])
135+
null_loc = pd.Series([False, True, False, True])
136+
135137
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
136-
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
137-
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp(
138-
'2011-01-02 10:00')])
138+
expected = Series([Timestamp('2011-01-01 10:00'),
139+
Timestamp('2011-01-02 10:00'),
140+
Timestamp('2011-01-03 10:00'),
141+
Timestamp('2011-01-02 10:00')])
139142
self.assert_series_equal(expected, result)
143+
# check s is not changed
144+
self.assert_series_equal(pd.isnull(s), null_loc)
140145

141146
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
142-
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
143-
'2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'),
144-
Timestamp('2011-01-02 10:00', tz=tz)])
147+
expected = Series([Timestamp('2011-01-01 10:00'),
148+
Timestamp('2011-01-02 10:00', tz=tz),
149+
Timestamp('2011-01-03 10:00'),
150+
Timestamp('2011-01-02 10:00', tz=tz)])
145151
self.assert_series_equal(expected, result)
152+
self.assert_series_equal(pd.isnull(s), null_loc)
146153

147154
result = s.fillna('AAA')
148155
expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
149156
Timestamp('2011-01-03 10:00'), 'AAA'],
150157
dtype=object)
151158
self.assert_series_equal(expected, result)
159+
self.assert_series_equal(pd.isnull(s), null_loc)
152160

153161
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
154162
3: pd.Timestamp('2011-01-04 10:00')})
155-
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
156-
'2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'),
157-
Timestamp('2011-01-04 10:00')])
163+
expected = Series([Timestamp('2011-01-01 10:00'),
164+
Timestamp('2011-01-02 10:00', tz=tz),
165+
Timestamp('2011-01-03 10:00'),
166+
Timestamp('2011-01-04 10:00')])
158167
self.assert_series_equal(expected, result)
168+
self.assert_series_equal(pd.isnull(s), null_loc)
159169

160170
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
161171
3: pd.Timestamp('2011-01-04 10:00')})
162-
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp(
163-
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp(
164-
'2011-01-04 10:00')])
172+
expected = Series([Timestamp('2011-01-01 10:00'),
173+
Timestamp('2011-01-02 10:00'),
174+
Timestamp('2011-01-03 10:00'),
175+
Timestamp('2011-01-04 10:00')])
165176
self.assert_series_equal(expected, result)
177+
self.assert_series_equal(pd.isnull(s), null_loc)
166178

167179
# DatetimeBlockTZ
168180
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
169181
'2011-01-03 10:00', pd.NaT], tz=tz)
170182
s = pd.Series(idx)
183+
self.assertEqual(s.dtype, 'datetime64[ns, {0}]'.format(tz))
184+
self.assert_series_equal(pd.isnull(s), null_loc)
185+
171186
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
172-
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
173-
'2011-01-02 10:00'), Timestamp('2011-01-03 10:00', tz=tz),
174-
Timestamp('2011-01-02 10:00')])
187+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
188+
Timestamp('2011-01-02 10:00'),
189+
Timestamp('2011-01-03 10:00', tz=tz),
190+
Timestamp('2011-01-02 10:00')])
175191
self.assert_series_equal(expected, result)
192+
self.assert_series_equal(pd.isnull(s), null_loc)
176193

177194
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
178195
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
179196
'2011-01-03 10:00', '2011-01-02 10:00'],
180197
tz=tz)
181198
expected = Series(idx)
182199
self.assert_series_equal(expected, result)
200+
self.assert_series_equal(pd.isnull(s), null_loc)
183201

184-
result = s.fillna(pd.Timestamp(
185-
'2011-01-02 10:00', tz=tz).to_pydatetime())
202+
result = s.fillna(pd.Timestamp('2011-01-02 10:00',
203+
tz=tz).to_pydatetime())
186204
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
187205
'2011-01-03 10:00', '2011-01-02 10:00'],
188206
tz=tz)
189207
expected = Series(idx)
190208
self.assert_series_equal(expected, result)
209+
self.assert_series_equal(pd.isnull(s), null_loc)
191210

192211
result = s.fillna('AAA')
193212
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
194213
Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
195214
dtype=object)
196215
self.assert_series_equal(expected, result)
216+
self.assert_series_equal(pd.isnull(s), null_loc)
197217

198218
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
199219
3: pd.Timestamp('2011-01-04 10:00')})
200-
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
201-
'2011-01-02 10:00', tz=tz), Timestamp(
202-
'2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')])
220+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
221+
Timestamp('2011-01-02 10:00', tz=tz),
222+
Timestamp('2011-01-03 10:00', tz=tz),
223+
Timestamp('2011-01-04 10:00')])
203224
self.assert_series_equal(expected, result)
225+
self.assert_series_equal(pd.isnull(s), null_loc)
204226

205227
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
206228
3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
207-
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
208-
'2011-01-02 10:00', tz=tz), Timestamp(
209-
'2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00',
210-
tz=tz)])
229+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
230+
Timestamp('2011-01-02 10:00', tz=tz),
231+
Timestamp('2011-01-03 10:00', tz=tz),
232+
Timestamp('2011-01-04 10:00', tz=tz)])
211233
self.assert_series_equal(expected, result)
234+
self.assert_series_equal(pd.isnull(s), null_loc)
212235

213236
# filling with a naive/other zone, coerce to object
214237
result = s.fillna(Timestamp('20130101'))
215-
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp(
216-
'2013-01-01'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp(
217-
'2013-01-01')])
238+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
239+
Timestamp('2013-01-01'),
240+
Timestamp('2011-01-03 10:00', tz=tz),
241+
Timestamp('2013-01-01')])
218242
self.assert_series_equal(expected, result)
243+
self.assert_series_equal(pd.isnull(s), null_loc)
219244

220245
result = s.fillna(Timestamp('20130101', tz='US/Pacific'))
221246
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
222247
Timestamp('2013-01-01', tz='US/Pacific'),
223248
Timestamp('2011-01-03 10:00', tz=tz),
224249
Timestamp('2013-01-01', tz='US/Pacific')])
225250
self.assert_series_equal(expected, result)
251+
self.assert_series_equal(pd.isnull(s), null_loc)
226252

227253
def test_fillna_int(self):
228254
s = Series(np.random.randint(-100, 100, 50))

0 commit comments

Comments
 (0)