Skip to content

Commit 2bf9fb5

Browse files
committed
BUG: bug in .copy of datetime tz-aware objects, #11794
Not always deep-copying the underlying impl, which is a DatetimeIndex where shallow copies are views
1 parent 43edd83 commit 2bf9fb5

File tree

5 files changed

+68
-32
lines changed

5 files changed

+68
-32
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Bug Fixes
170170
- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
171171
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
172172
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
173-
173+
- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)
174174

175175

176176

pandas/core/dtypes.py

+3
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def __hash__(self):
6565
def __eq__(self, other):
6666
raise NotImplementedError("sub-classes should implement an __eq__ method")
6767

68+
def __ne__(self, other):
69+
return not self.__eq__(other)
70+
6871
@classmethod
6972
def is_dtype(cls, dtype):
7073
""" Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """

pandas/core/internals.py

+20-16
Original file line numberDiff line numberDiff line change
@@ -168,17 +168,11 @@ def make_block(self, values, placement=None, ndim=None, **kwargs):
168168

169169
return make_block(values, placement=placement, ndim=ndim, **kwargs)
170170

171-
def make_block_same_class(self, values, placement, copy=False, fastpath=True,
172-
**kwargs):
173-
"""
174-
Wrap given values in a block of same type as self.
175-
176-
`kwargs` are used in SparseBlock override.
177-
178-
"""
179-
if copy:
180-
values = values.copy()
181-
return make_block(values, placement, klass=self.__class__,
171+
def make_block_same_class(self, values, placement=None, fastpath=True, **kwargs):
172+
""" Wrap given values in a block of same type as self. """
173+
if placement is None:
174+
placement = self.mgr_locs
175+
return make_block(values, placement=placement, klass=self.__class__,
182176
fastpath=fastpath, **kwargs)
183177

184178
@mgr_locs.setter
@@ -573,12 +567,11 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs):
573567

574568
# block actions ####
575569
def copy(self, deep=True, mgr=None):
570+
""" copy constructor """
576571
values = self.values
577572
if deep:
578573
values = values.copy()
579-
return self.make_block(values,
580-
klass=self.__class__,
581-
fastpath=True)
574+
return self.make_block_same_class(values)
582575

583576
def replace(self, to_replace, value, inplace=False, filter=None,
584577
regex=False, convert=True, mgr=None):
@@ -2140,6 +2133,13 @@ def __init__(self, values, placement, ndim=2,
21402133
placement=placement,
21412134
ndim=ndim,
21422135
**kwargs)
2136+
def copy(self, deep=True, mgr=None):
2137+
""" copy constructor """
2138+
values = self.values
2139+
if deep:
2140+
values = values.copy(deep=True)
2141+
return self.make_block_same_class(values)
2142+
21432143
def external_values(self):
21442144
""" we internally represent the data as a DatetimeIndex, but for external
21452145
compat with ndarray, export as a ndarray of Timestamps """
@@ -3257,10 +3257,14 @@ def get_scalar(self, tup):
32573257
full_loc = list(ax.get_loc(x)
32583258
for ax, x in zip(self.axes, tup))
32593259
blk = self.blocks[self._blknos[full_loc[0]]]
3260-
full_loc[0] = self._blklocs[full_loc[0]]
3260+
values = blk.values
32613261

32623262
# FIXME: this may return non-upcasted types?
3263-
return blk.values[tuple(full_loc)]
3263+
if values.ndim == 1:
3264+
return values[full_loc[1]]
3265+
3266+
full_loc[0] = self._blklocs[full_loc[0]]
3267+
return values[tuple(full_loc)]
32643268

32653269
def delete(self, item):
32663270
"""

pandas/tests/test_internals.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ def create_mgr(descr, item_shape=None):
147147
block_placements = OrderedDict()
148148
for d in descr.split(';'):
149149
d = d.strip()
150+
if not len(d):
151+
continue
150152
names, blockstr = d.partition(':')[::2]
151153
blockstr = blockstr.strip()
152154
names = names.strip().split(',')
@@ -324,7 +326,8 @@ class TestBlockManager(tm.TestCase):
324326

325327
def setUp(self):
326328
self.mgr = create_mgr('a: f8; b: object; c: f8; d: object; e: f8;'
327-
'f: bool; g: i8; h: complex')
329+
'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
330+
'k: M8[ns, US/Eastern]; l: M8[ns, CET];')
328331

329332
def test_constructor_corner(self):
330333
pass
@@ -476,16 +479,24 @@ def test_set_change_dtype_slice(self): # GH8850
476479
DataFrame([[3], [6]], columns=cols[2:]))
477480

478481
def test_copy(self):
479-
shallow = self.mgr.copy(deep=False)
480-
481-
# we don't guaranteee block ordering
482-
for blk in self.mgr.blocks:
483-
found = False
484-
for cp_blk in shallow.blocks:
485-
if cp_blk.values is blk.values:
486-
found = True
487-
break
488-
self.assertTrue(found)
482+
cp = self.mgr.copy(deep=False)
483+
for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
484+
485+
# view assertion
486+
self.assertTrue(cp_blk.equals(blk))
487+
self.assertTrue(cp_blk.values.base is blk.values.base)
488+
489+
cp = self.mgr.copy(deep=True)
490+
for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
491+
492+
# copy assertion
493+
# we either have a None for a base or in case of some blocks it is an array (e.g. datetimetz),
494+
# but was copied
495+
self.assertTrue(cp_blk.equals(blk))
496+
if cp_blk.values.base is not None and blk.values.base is not None:
497+
self.assertFalse(cp_blk.values.base is blk.values.base)
498+
else:
499+
self.assertTrue(cp_blk.values.base is None and blk.values.base is None)
489500

490501
def test_sparse(self):
491502
mgr = create_mgr('a: sparse-1; b: sparse-2')
@@ -688,7 +699,10 @@ def test_consolidate_ordering_issues(self):
688699
self.mgr.set('g', randn(N))
689700
self.mgr.set('h', randn(N))
690701

702+
# we have datetime/tz blocks in self.mgr
691703
cons = self.mgr.consolidate()
704+
self.assertEqual(cons.nblocks, 4)
705+
cons = self.mgr.consolidate().get_numeric_data()
692706
self.assertEqual(cons.nblocks, 1)
693707
assert_almost_equal(cons.blocks[0].mgr_locs,
694708
np.arange(len(cons.items)))

pandas/tests/test_series.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -5111,12 +5111,27 @@ def test_cov(self):
51115111
self.assertTrue(isnull(ts1.cov(ts2, min_periods=12)))
51125112

51135113
def test_copy(self):
5114-
ts = self.ts.copy()
51155114

5116-
ts[::2] = np.NaN
5115+
for deep in [False, True]:
5116+
s = Series(np.arange(10),dtype='float64')
5117+
s2 = s.copy(deep=deep)
5118+
s2[::2] = np.NaN
5119+
5120+
# Did not modify original Series
5121+
self.assertTrue(np.isnan(s2[0]))
5122+
self.assertFalse(np.isnan(s[0]))
51175123

5118-
# Did not modify original Series
5119-
self.assertFalse(np.isnan(self.ts[0]))
5124+
# GH 11794
5125+
# copy of tz-aware
5126+
expected = Series([Timestamp('2012/01/01', tz='UTC')])
5127+
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
5128+
5129+
for deep in [False, True]:
5130+
s = Series([Timestamp('2012/01/01', tz='UTC')])
5131+
s2 = s.copy()
5132+
s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
5133+
assert_series_equal(s, expected)
5134+
assert_series_equal(s2, expected2)
51205135

51215136
def test_count(self):
51225137
self.assertEqual(self.ts.count(), len(self.ts))

0 commit comments

Comments
 (0)