Skip to content

Commit cbf7043

Browse files
committed
Merge pull request #11796 from jreback/tzcopy
BUG: bug in deep copy of datetime tz-aware objects, #11794
2 parents 3361a48 + d526a4f commit cbf7043

File tree

6 files changed

+117
-42
lines changed

6 files changed

+117
-42
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Bug Fixes
170170
- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
171171
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
172172
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
173-
173+
- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)
174174

175175

176176

pandas/core/common.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def _isnull_ndarraylike(obj):
232232
values = getattr(obj, 'values', obj)
233233
dtype = values.dtype
234234

235-
if dtype.kind in ('O', 'S', 'U'):
235+
if is_string_dtype(dtype):
236236
if is_categorical_dtype(values):
237237
from pandas import Categorical
238238
if not isinstance(values, Categorical):
@@ -243,7 +243,7 @@ def _isnull_ndarraylike(obj):
243243
# Working around NumPy ticket 1542
244244
shape = values.shape
245245

246-
if dtype.kind in ('S', 'U'):
246+
if is_string_like_dtype(dtype):
247247
result = np.zeros(values.shape, dtype=bool)
248248
else:
249249
result = np.empty(shape, dtype=bool)
@@ -267,11 +267,11 @@ def _isnull_ndarraylike_old(obj):
267267
values = getattr(obj, 'values', obj)
268268
dtype = values.dtype
269269

270-
if dtype.kind in ('O', 'S', 'U'):
270+
if is_string_dtype(dtype):
271271
# Working around NumPy ticket 1542
272272
shape = values.shape
273273

274-
if values.dtype.kind in ('S', 'U'):
274+
if is_string_like_dtype(dtype):
275275
result = np.zeros(values.shape, dtype=bool)
276276
else:
277277
result = np.empty(shape, dtype=bool)
@@ -2208,13 +2208,17 @@ def is_numeric_v_string_like(a, b):
22082208

22092209
is_a_numeric_array = is_a_array and is_numeric_dtype(a)
22102210
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
2211+
is_a_string_array = is_a_array and is_string_like_dtype(a)
2212+
is_b_string_array = is_b_array and is_string_like_dtype(b)
22112213

22122214
is_a_scalar_string_like = not is_a_array and is_string_like(a)
22132215
is_b_scalar_string_like = not is_b_array and is_string_like(b)
22142216

22152217
return (
22162218
is_a_numeric_array and is_b_scalar_string_like) or (
2217-
is_b_numeric_array and is_a_scalar_string_like
2219+
is_b_numeric_array and is_a_scalar_string_like) or (
2220+
is_a_numeric_array and is_b_string_array) or (
2221+
is_b_numeric_array and is_a_string_array
22182222
)
22192223

22202224
def is_datetimelike_v_numeric(a, b):
@@ -2257,6 +2261,15 @@ def is_numeric_dtype(arr_or_dtype):
22572261
and not issubclass(tipo, (np.datetime64, np.timedelta64)))
22582262

22592263

2264+
def is_string_dtype(arr_or_dtype):
2265+
dtype = _get_dtype(arr_or_dtype)
2266+
return dtype.kind in ('O', 'S', 'U')
2267+
2268+
def is_string_like_dtype(arr_or_dtype):
2269+
# exclude object as its a mixed dtype
2270+
dtype = _get_dtype(arr_or_dtype)
2271+
return dtype.kind in ('S', 'U')
2272+
22602273
def is_float_dtype(arr_or_dtype):
22612274
tipo = _get_dtype_type(arr_or_dtype)
22622275
return issubclass(tipo, np.floating)

pandas/core/dtypes.py

+3
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def __hash__(self):
6565
def __eq__(self, other):
6666
raise NotImplementedError("sub-classes should implement an __eq__ method")
6767

68+
def __ne__(self, other):
69+
return not self.__eq__(other)
70+
6871
@classmethod
6972
def is_dtype(cls, dtype):
7073
""" Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """

pandas/core/internals.py

+28-21
Original file line numberDiff line numberDiff line change
@@ -168,17 +168,11 @@ def make_block(self, values, placement=None, ndim=None, **kwargs):
168168

169169
return make_block(values, placement=placement, ndim=ndim, **kwargs)
170170

171-
def make_block_same_class(self, values, placement, copy=False, fastpath=True,
172-
**kwargs):
173-
"""
174-
Wrap given values in a block of same type as self.
175-
176-
`kwargs` are used in SparseBlock override.
177-
178-
"""
179-
if copy:
180-
values = values.copy()
181-
return make_block(values, placement, klass=self.__class__,
171+
def make_block_same_class(self, values, placement=None, fastpath=True, **kwargs):
172+
""" Wrap given values in a block of same type as self. """
173+
if placement is None:
174+
placement = self.mgr_locs
175+
return make_block(values, placement=placement, klass=self.__class__,
182176
fastpath=fastpath, **kwargs)
183177

184178
@mgr_locs.setter
@@ -573,12 +567,11 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs):
573567

574568
# block actions ####
575569
def copy(self, deep=True, mgr=None):
570+
""" copy constructor """
576571
values = self.values
577572
if deep:
578573
values = values.copy()
579-
return self.make_block(values,
580-
klass=self.__class__,
581-
fastpath=True)
574+
return self.make_block_same_class(values)
582575

583576
def replace(self, to_replace, value, inplace=False, filter=None,
584577
regex=False, convert=True, mgr=None):
@@ -2140,6 +2133,13 @@ def __init__(self, values, placement, ndim=2,
21402133
placement=placement,
21412134
ndim=ndim,
21422135
**kwargs)
2136+
def copy(self, deep=True, mgr=None):
2137+
""" copy constructor """
2138+
values = self.values
2139+
if deep:
2140+
values = values.copy(deep=True)
2141+
return self.make_block_same_class(values)
2142+
21432143
def external_values(self):
21442144
""" we internally represent the data as a DatetimeIndex, but for external
21452145
compat with ndarray, export as a ndarray of Timestamps """
@@ -3257,10 +3257,14 @@ def get_scalar(self, tup):
32573257
full_loc = list(ax.get_loc(x)
32583258
for ax, x in zip(self.axes, tup))
32593259
blk = self.blocks[self._blknos[full_loc[0]]]
3260-
full_loc[0] = self._blklocs[full_loc[0]]
3260+
values = blk.values
32613261

32623262
# FIXME: this may return non-upcasted types?
3263-
return blk.values[tuple(full_loc)]
3263+
if values.ndim == 1:
3264+
return values[full_loc[1]]
3265+
3266+
full_loc[0] = self._blklocs[full_loc[0]]
3267+
return values[tuple(full_loc)]
32643268

32653269
def delete(self, item):
32663270
"""
@@ -4415,11 +4419,14 @@ def _putmask_smart(v, m, n):
44154419
try:
44164420
nn = n[m]
44174421
nn_at = nn.astype(v.dtype)
4418-
comp = (nn == nn_at)
4419-
if is_list_like(comp) and comp.all():
4420-
nv = v.copy()
4421-
nv[m] = nn_at
4422-
return nv
4422+
4423+
# avoid invalid dtype comparisons
4424+
if not is_numeric_v_string_like(nn, nn_at):
4425+
comp = (nn == nn_at)
4426+
if is_list_like(comp) and comp.all():
4427+
nv = v.copy()
4428+
nv[m] = nn_at
4429+
return nv
44234430
except (ValueError, IndexError, TypeError):
44244431
pass
44254432

pandas/tests/test_internals.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ def create_mgr(descr, item_shape=None):
147147
block_placements = OrderedDict()
148148
for d in descr.split(';'):
149149
d = d.strip()
150+
if not len(d):
151+
continue
150152
names, blockstr = d.partition(':')[::2]
151153
blockstr = blockstr.strip()
152154
names = names.strip().split(',')
@@ -324,7 +326,8 @@ class TestBlockManager(tm.TestCase):
324326

325327
def setUp(self):
326328
self.mgr = create_mgr('a: f8; b: object; c: f8; d: object; e: f8;'
327-
'f: bool; g: i8; h: complex')
329+
'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
330+
'k: M8[ns, US/Eastern]; l: M8[ns, CET];')
328331

329332
def test_constructor_corner(self):
330333
pass
@@ -476,16 +479,24 @@ def test_set_change_dtype_slice(self): # GH8850
476479
DataFrame([[3], [6]], columns=cols[2:]))
477480

478481
def test_copy(self):
479-
shallow = self.mgr.copy(deep=False)
480-
481-
# we don't guaranteee block ordering
482-
for blk in self.mgr.blocks:
483-
found = False
484-
for cp_blk in shallow.blocks:
485-
if cp_blk.values is blk.values:
486-
found = True
487-
break
488-
self.assertTrue(found)
482+
cp = self.mgr.copy(deep=False)
483+
for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
484+
485+
# view assertion
486+
self.assertTrue(cp_blk.equals(blk))
487+
self.assertTrue(cp_blk.values.base is blk.values.base)
488+
489+
cp = self.mgr.copy(deep=True)
490+
for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
491+
492+
# copy assertion
493+
# we either have a None for a base or in case of some blocks it is an array (e.g. datetimetz),
494+
# but was copied
495+
self.assertTrue(cp_blk.equals(blk))
496+
if cp_blk.values.base is not None and blk.values.base is not None:
497+
self.assertFalse(cp_blk.values.base is blk.values.base)
498+
else:
499+
self.assertTrue(cp_blk.values.base is None and blk.values.base is None)
489500

490501
def test_sparse(self):
491502
mgr = create_mgr('a: sparse-1; b: sparse-2')
@@ -688,7 +699,10 @@ def test_consolidate_ordering_issues(self):
688699
self.mgr.set('g', randn(N))
689700
self.mgr.set('h', randn(N))
690701

702+
# we have datetime/tz blocks in self.mgr
691703
cons = self.mgr.consolidate()
704+
self.assertEqual(cons.nblocks, 4)
705+
cons = self.mgr.consolidate().get_numeric_data()
692706
self.assertEqual(cons.nblocks, 1)
693707
assert_almost_equal(cons.blocks[0].mgr_locs,
694708
np.arange(len(cons.items)))

pandas/tests/test_series.py

+42-4
Original file line numberDiff line numberDiff line change
@@ -5111,12 +5111,50 @@ def test_cov(self):
51115111
self.assertTrue(isnull(ts1.cov(ts2, min_periods=12)))
51125112

51135113
def test_copy(self):
5114-
ts = self.ts.copy()
51155114

5116-
ts[::2] = np.NaN
5115+
for deep in [None, False, True]:
5116+
s = Series(np.arange(10),dtype='float64')
5117+
5118+
# default deep is True
5119+
if deep is None:
5120+
s2 = s.copy()
5121+
else:
5122+
s2 = s.copy(deep=deep)
5123+
5124+
s2[::2] = np.NaN
5125+
5126+
if deep is None or deep is True:
5127+
# Did not modify original Series
5128+
self.assertTrue(np.isnan(s2[0]))
5129+
self.assertFalse(np.isnan(s[0]))
5130+
else:
51175131

5118-
# Did not modify original Series
5119-
self.assertFalse(np.isnan(self.ts[0]))
5132+
# we DID modify the original Series
5133+
self.assertTrue(np.isnan(s2[0]))
5134+
self.assertTrue(np.isnan(s[0]))
5135+
5136+
# GH 11794
5137+
# copy of tz-aware
5138+
expected = Series([Timestamp('2012/01/01', tz='UTC')])
5139+
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
5140+
5141+
for deep in [None, False, True]:
5142+
s = Series([Timestamp('2012/01/01', tz='UTC')])
5143+
5144+
if deep is None:
5145+
s2 = s.copy()
5146+
else:
5147+
s2 = s.copy(deep=deep)
5148+
5149+
s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
5150+
5151+
# default deep is True
5152+
if deep is None or deep is True:
5153+
assert_series_equal(s, expected)
5154+
assert_series_equal(s2, expected2)
5155+
else:
5156+
assert_series_equal(s, expected2)
5157+
assert_series_equal(s2, expected2)
51205158

51215159
def test_count(self):
51225160
self.assertEqual(self.ts.count(), len(self.ts))

0 commit comments

Comments
 (0)