diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index e9614ec4f2290..512aacad8a837 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -72,9 +72,15 @@ Bug Fixes - Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`) + - Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`) - Bug in comparisons of Series vs list-likes (:issue:`11339`) + +- Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`) + + + - Bug in list-like indexing with a mixed-integer Index (:issue:`11320`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 98f9677fb6784..248203c259aaa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2999,8 +2999,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, '{0!r}').format(type(to_replace).__name__) raise TypeError(msg) # pragma: no cover - new_data = new_data.convert(copy=not inplace, numeric=False) - if inplace: self._update_inplace(new_data) else: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c8c834180c9f6..ed4d6a6ccd73e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -137,6 +137,11 @@ def get_values(self, dtype=None): def to_dense(self): return self.values.view() + def to_object_block(self, mgr): + """ return myself as an object block """ + values = self.get_values(dtype=object) + return self.make_block(values,klass=ObjectBlock) + @property def fill_value(self): return np.nan @@ -215,7 +220,7 @@ def _slice(self, slicer): """ return a slice of my values """ return self.values[slicer] - def reshape_nd(self, labels, shape, ref_items): + def reshape_nd(self, labels, shape, ref_items, mgr=None): """ Parameters ---------- @@ -312,7 +317,7 @@ def delete(self, loc): self.values = np.delete(self.values, loc, 0) self.mgr_locs = self.mgr_locs.delete(loc) - def apply(self, func, **kwargs): + def apply(self, func, mgr=None, **kwargs): """ apply the function to my values; return a block if we are not one """ result = func(self.values, **kwargs) if not isinstance(result, Block): @@ -320,13 +325,17 @@ def apply(self, func, **kwargs): return result - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None, mgr=None): + """ fillna on the block with the value. If we fail, then convert to ObjectBlock + and try again """ + if not self._can_hold_na: if inplace: - return [self] + return self else: - return [self.copy()] + return self.copy() + original_value = value mask = isnull(self.values) if limit is not None: if self.ndim > 2: @@ -334,9 +343,24 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): "is currently limited to 2") mask[mask.cumsum(self.ndim-1) > limit] = False - value = self._try_fill(value) - blocks = self.putmask(mask, value, inplace=inplace) - return self._maybe_downcast(blocks, downcast) + # fillna, but if we cannot coerce, then try again as an ObjectBlock + try: + values, _, value, _ = self._try_coerce_args(self.values, value) + blocks = self.putmask(mask, value, inplace=inplace) + blocks = [ b.make_block(values=self._try_coerce_result(b.values)) for b in blocks ] + return self._maybe_downcast(blocks, downcast) + except (TypeError, ValueError): + + # we can't process the value, but nothing to do + if not mask.any(): + return self if inplace else self.copy() + + # we cannot coerce the underlying object, so + # make an ObjectBlock + return self.to_object_block(mgr=mgr).fillna(original_value, + limit=limit, + inplace=inplace, + downcast=False) def _maybe_downcast(self, blocks, downcast=None): @@ -347,18 +371,14 @@ def _maybe_downcast(self, blocks, downcast=None): elif downcast is None and (self.is_timedelta or self.is_datetime): return blocks - result_blocks = [] - for b in blocks: - result_blocks.extend(b.downcast(downcast)) + return _extend_blocks([ b.downcast(downcast) for b in blocks ]) - return result_blocks - - def downcast(self, dtypes=None): + def downcast(self, dtypes=None, mgr=None): """ try to downcast each item to the dict of dtypes if present """ # turn it off completely if dtypes is False: - return [self] + return self values = self.values @@ -370,12 +390,12 @@ def downcast(self, dtypes=None): dtypes = 'infer' nv = _possibly_downcast_to_dtype(values, dtypes) - return [self.make_block(nv, - fastpath=True)] + return self.make_block(nv, + fastpath=True) # ndim > 1 if dtypes is None: - return [self] + return self if not (dtypes == 'infer' or isinstance(dtypes, dict)): raise ValueError("downcast must have a dictionary or 'infer' as " @@ -409,7 +429,7 @@ def astype(self, dtype, copy=False, raise_on_error=True, values=None, **kwargs): values=values, **kwargs) def _astype(self, dtype, copy=False, raise_on_error=True, values=None, - klass=None, **kwargs): + klass=None, mgr=None, **kwargs): """ Coerce to the new type (if copy=True, return a new copy) raise on an except if raise == True @@ -474,7 +494,7 @@ def convert(self, copy=True, **kwargs): return a copy of the block (if copy = True) by definition we are not an ObjectBlock here! """ - return [self.copy()] if copy else [self] + return self.copy() if copy else self def _can_hold_element(self, value): raise NotImplementedError() @@ -520,7 +540,7 @@ def _try_operate(self, values): def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ - return values, other + return values, False, other, False def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -551,7 +571,7 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs): return values # block actions #### - def copy(self, deep=True): + def copy(self, deep=True, mgr=None): values = self.values if deep: values = values.copy() @@ -560,23 +580,45 @@ def copy(self, deep=True): fastpath=True) def replace(self, to_replace, value, inplace=False, filter=None, - regex=False): + regex=False, convert=True, mgr=None): """ replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. It is used in ObjectBlocks. It is here for API compatibility.""" - mask = com.mask_missing(self.values, to_replace) - if filter is not None: - filtered_out = ~self.mgr_locs.isin(filter) - mask[filtered_out.nonzero()[0]] = False - if not mask.any(): - if inplace: - return [self] - return [self.copy()] - return self.putmask(mask, value, inplace=inplace) + original_to_replace = to_replace + + # try to replace, if we raise an error, convert to ObjectBlock and retry + try: + values, _, to_replace, _ = self._try_coerce_args(self.values, to_replace) + mask = com.mask_missing(values, to_replace) + if filter is not None: + filtered_out = ~self.mgr_locs.isin(filter) + mask[filtered_out.nonzero()[0]] = False + + blocks = self.putmask(mask, value, inplace=inplace) + if convert: + blocks = [ b.convert(by_item=True, numeric=False, copy=not inplace) for b in blocks ] + return blocks + except (TypeError, ValueError): - def setitem(self, indexer, value): + # we can't process the value, but nothing to do + if not mask.any(): + return self if inplace else self.copy() + + return self.to_object_block(mgr=mgr).replace(to_replace=original_to_replace, + value=value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert) + + + def _replace_single(self, *args, **kwargs): + """ no-op on a non-ObjectBlock """ + return self if kwargs['inplace'] else self.copy() + + def setitem(self, indexer, value, mgr=None): """ set the value inplace; return a new block (of a possibly different dtype) @@ -590,7 +632,7 @@ def setitem(self, indexer, value): value = np.nan # coerce args - values, value = self._try_coerce_args(self.values, value) + values, _, value, _ = self._try_coerce_args(self.values, value) arr_value = np.array(value) # cast the values to a type that can hold nan (if necessary) @@ -682,7 +724,7 @@ def _is_empty_indexer(indexer): return [self] def putmask(self, mask, new, align=True, inplace=False, - axis=0, transpose=False): + axis=0, transpose=False, mgr=None): """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -797,7 +839,7 @@ def putmask(self, mask, new, align=True, inplace=False, def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', - fill_value=None, coerce=False, downcast=None, **kwargs): + fill_value=None, coerce=False, downcast=None, mgr=None, **kwargs): def check_int_bool(self, inplace): # Only FloatBlocks will contain NaNs. @@ -824,7 +866,8 @@ def check_int_bool(self, inplace): limit=limit, fill_value=fill_value, coerce=coerce, - downcast=downcast) + downcast=downcast, + mgr=mgr) # try an interp method try: m = com._clean_interp_method(method, **kwargs) @@ -844,13 +887,14 @@ def check_int_bool(self, inplace): fill_value=fill_value, inplace=inplace, downcast=downcast, + mgr=mgr, **kwargs) raise ValueError("invalid method '{0}' to interpolate.".format(method)) def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, coerce=False, - downcast=None): + downcast=None, mgr=None): """ fillna but using the interpolate machinery """ # if we are coercing, then don't force the conversion @@ -862,8 +906,8 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, else: return [self.copy()] - fill_value = self._try_fill(fill_value) values = self.values if inplace else self.values.copy() + values, _, fill_value, _ = self._try_coerce_args(values, fill_value) values = self._try_operate(values) values = com.interpolate_2d(values, method=method, @@ -881,7 +925,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, def _interpolate(self, method=None, index=None, values=None, fill_value=None, axis=0, limit=None, limit_direction='forward', - inplace=False, downcast=None, **kwargs): + inplace=False, downcast=None, mgr=None, **kwargs): """ interpolate using scipy wrappers """ data = self.values if inplace else self.values.copy() @@ -957,13 +1001,13 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): else: return self.make_block_same_class(new_values, new_mgr_locs) - def diff(self, n, axis=1): + def diff(self, n, axis=1, mgr=None): """ return block for the diff of the values """ new_values = com.diff(self.values, n, axis=axis) return [self.make_block(values=new_values, fastpath=True)] - def shift(self, periods, axis=0): + def shift(self, periods, axis=0, mgr=None): """ shift the block by periods, possibly upcast """ # convert integer to float if necessary. need to do a lot more than @@ -993,7 +1037,7 @@ def shift(self, periods, axis=0): return [self.make_block(new_values, fastpath=True)] - def eval(self, func, other, raise_on_error=True, try_cast=False): + def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): """ evaluate the block; return result block from the result @@ -1003,6 +1047,7 @@ def eval(self, func, other, raise_on_error=True, try_cast=False): other : a ndarray/object raise_on_error : if True, raise when I can't perform the function, False by default (and just return the data that we had coming in) + try_cast : try casting the results to the input type Returns ------- @@ -1032,11 +1077,26 @@ def eval(self, func, other, raise_on_error=True, try_cast=False): transf = (lambda x: x.T) if is_transposed else (lambda x: x) # coerce/transpose the args if needed - values, other = self._try_coerce_args(transf(values), other) + values, values_mask, other, other_mask = self._try_coerce_args(transf(values), other) # get the result, may need to transpose the other def get_result(other): - return self._try_coerce_result(func(values, other)) + + # compute + result = func(values, other) + + # mask if needed + if isinstance(values_mask, np.ndarray) and values_mask.any(): + result = result.astype('float64',copy=False) + result[values_mask] = np.nan + if other_mask is True: + result = result.astype('float64',copy=False) + result[:] = np.nan + elif isinstance(other_mask, np.ndarray) and other_mask.any(): + result = result.astype('float64',copy=False) + result[other_mask.ravel()] = np.nan + + return self._try_coerce_result(result) # error handler if we have an issue operating with the function def handle_error(): @@ -1086,7 +1146,7 @@ def handle_error(): fastpath=True,)] def where(self, other, cond, align=True, raise_on_error=True, - try_cast=False, axis=0, transpose=False): + try_cast=False, axis=0, transpose=False, mgr=None): """ evaluate the block; return result block(s) from the result @@ -1128,22 +1188,22 @@ def where(self, other, cond, align=True, raise_on_error=True, other = _maybe_convert_string_to_object(other) # our where function - def func(c, v, o): - if c.ravel().all(): - return v + def func(cond, values, other): + if cond.ravel().all(): + return values - v, o = self._try_coerce_args(v, o) + values, values_mask, other, other_mask = self._try_coerce_args(values, other) try: return self._try_coerce_result( - expressions.where(c, v, o, raise_on_error=True) + expressions.where(cond, values, other, raise_on_error=True) ) except Exception as detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values ' - '[%s]' % (repr(o), str(detail))) + '[%s]' % (repr(other), str(detail))) else: # return the values - result = np.empty(v.shape, dtype='float64') + result = np.empty(values.shape, dtype='float64') result.fill(np.nan) return result @@ -1253,6 +1313,34 @@ def get(self, item): else: return self.values + def putmask(self, mask, new, align=True, inplace=False, + axis=0, transpose=False, mgr=None): + """ + putmask the data to the block; we must be a single block and not generate + other blocks + + return the resulting block + + Parameters + ---------- + mask : the condition to respect + new : a ndarray/object + align : boolean, perform alignment on other/cond, default is True + inplace : perform inplace modification, default is False + + Returns + ------- + a new block(s), the result of the putmask + """ + new_values = self.values if inplace else self.values.copy() + new_values, _, new, _ = self._try_coerce_args(new_values, new) + + if isinstance(new, np.ndarray) and len(new) == len(mask): + new = new[mask] + new_values[mask] = new + new_values = self._try_coerce_result(new_values) + return [self.make_block(values=new_values)] + def _slice(self, slicer): """ return a slice of my values (but densify first) """ return self.get_values()[slicer] @@ -1386,45 +1474,56 @@ class TimeDeltaBlock(IntBlock): def fill_value(self): return tslib.iNaT - def _try_fill(self, value): - """ if we are a NaT, return the actual fill value """ - if isinstance(value, type(tslib.NaT)) or np.array(isnull(value)).all(): - value = tslib.iNaT - elif isinstance(value, Timedelta): - value = value.value - elif isinstance(value, np.timedelta64): - pass - elif com.is_integer(value): - # coerce to seconds of timedelta - value = np.timedelta64(int(value * 1e9)) - elif isinstance(value, timedelta): - value = np.timedelta64(value) + def fillna(self, value, **kwargs): - return value + # allow filling with integers to be + # interpreted as seconds + if not isinstance(value, np.timedelta64) and com.is_integer(value): + value = Timedelta(value,unit='s') + return super(TimeDeltaBlock, self).fillna(value, **kwargs) def _try_coerce_args(self, values, other): - """ Coerce values and other to float64, with null values converted to - NaN. values is always ndarray-like, other may not be """ - def masker(v): - mask = isnull(v) - v = v.astype('float64') - v[mask] = np.nan - return v - - values = masker(values) - - if is_null_datelike_scalar(other): - other = np.nan - elif isinstance(other, (np.timedelta64, Timedelta, timedelta)): - other = _coerce_scalar_to_timedelta_type(other, unit='s', box=False).item() - if other == tslib.iNaT: - other = np.nan - elif lib.isscalar(other): - other = np.float64(other) + """ + Coerce values and other to int64, with null values converted to + iNaT. values is always ndarray-like, other may not be + + Parameters + ---------- + values : ndarray-like + other : ndarray-like or scalar + + Returns + ------- + base-type values, values mask, base-type other, other mask + """ + + values_mask = isnull(values) + values = values.view('i8') + other_mask = False + + if isinstance(other, bool): + raise TypeError + elif is_null_datelike_scalar(other): + other = tslib.iNaT + other_mask = True + elif isinstance(other, Timedelta): + other_mask = isnull(other) + other = other.value + elif isinstance(other, np.timedelta64): + other_mask = isnull(other) + other = other.view('i8') + elif isinstance(other, timedelta): + other = Timedelta(other).value + elif isinstance(other, np.ndarray): + other_mask = isnull(other) + other = other.astype('i8',copy=False).view('i8') else: - other = masker(other) + # scalar + other = Timedelta(other) + other_mask = isnull(other) + other = other.value - return values, other + return values, values_mask, other, other_mask def _try_operate(self, values): """ return a version to operate on """ @@ -1496,13 +1595,13 @@ def should_store(self, value): return issubclass(value.dtype.type, np.bool_) def replace(self, to_replace, value, inplace=False, filter=None, - regex=False): + regex=False, mgr=None): to_replace_values = np.atleast_1d(to_replace) if not np.can_cast(to_replace_values, bool): return self return super(BoolBlock, self).replace(to_replace, value, inplace=inplace, filter=filter, - regex=regex) + regex=regex, mgr=mgr) class ObjectBlock(Block): @@ -1609,10 +1708,7 @@ def _maybe_downcast(self, blocks, downcast=None): return blocks # split and convert the blocks - result_blocks = [] - for blk in blocks: - result_blocks.extend(blk.convert(datetime=True, numeric=False)) - return result_blocks + return _extend_blocks([ b.convert(datetime=True, numeric=False) for b in blocks ]) def _can_hold_element(self, element): return True @@ -1626,38 +1722,53 @@ def should_store(self, value): np.datetime64, np.bool_)) or is_internal_type(value)) def replace(self, to_replace, value, inplace=False, filter=None, - regex=False): - blk = [self] + regex=False, convert=True, mgr=None): to_rep_is_list = com.is_list_like(to_replace) value_is_list = com.is_list_like(value) both_lists = to_rep_is_list and value_is_list either_list = to_rep_is_list or value_is_list + result_blocks = [] + blocks = [self] + if not either_list and com.is_re(to_replace): - blk[0], = blk[0]._replace_single(to_replace, value, - inplace=inplace, filter=filter, - regex=True) + return self._replace_single(to_replace, value, + inplace=inplace, filter=filter, + regex=True, convert=convert, mgr=mgr) elif not (either_list or regex): - blk = super(ObjectBlock, self).replace(to_replace, value, - inplace=inplace, - filter=filter, regex=regex) + return super(ObjectBlock, self).replace(to_replace, value, + inplace=inplace, + filter=filter, regex=regex, + convert=convert, mgr=mgr) elif both_lists: for to_rep, v in zip(to_replace, value): - blk[0], = blk[0]._replace_single(to_rep, v, inplace=inplace, - filter=filter, regex=regex) + result_blocks = [] + for b in blocks: + result = b._replace_single(to_rep, v, inplace=inplace, + filter=filter, regex=regex, + convert=convert, mgr=mgr) + result_blocks = _extend_blocks(result, result_blocks) + blocks = result_blocks + return result_blocks + elif to_rep_is_list and regex: for to_rep in to_replace: - blk[0], = blk[0]._replace_single(to_rep, value, - inplace=inplace, - filter=filter, regex=regex) - else: - blk[0], = blk[0]._replace_single(to_replace, value, - inplace=inplace, filter=filter, - regex=regex) - return blk + result_blocks = [] + for b in blocks: + result = b._replace_single(to_rep, value, + inplace=inplace, + filter=filter, regex=regex, + convert=convert, mgr=mgr) + result_blocks = _extend_blocks(result, result_blocks) + blocks = result_blocks + return result_blocks + + return self._replace_single(to_replace, value, + inplace=inplace, filter=filter, + convert=convert, regex=regex, mgr=mgr) def _replace_single(self, to_replace, value, inplace=False, filter=None, - regex=False): + regex=False, convert=True, mgr=None): # to_replace is regex compilable to_rep_re = regex and com.is_re_compilable(to_replace) @@ -1689,13 +1800,11 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, else: # if the thing to replace is not a string or compiled regex call # the superclass method -> to_replace is some kind of object - result = super(ObjectBlock, self).replace(to_replace, value, - inplace=inplace, - filter=filter, - regex=regex) - if not isinstance(result, list): - result = [result] - return result + return super(ObjectBlock, self).replace(to_replace, value, + inplace=inplace, + filter=filter, + regex=regex, + mgr=mgr) new_values = self.values if inplace else self.values.copy() @@ -1725,9 +1834,12 @@ def re_replacer(s): new_values[filt] = f(new_values[filt]) - return [self if inplace else - self.make_block(new_values, - fastpath=True)] + # convert + block = self.make_block(new_values) + if convert: + block = block.convert(by_item=True,numeric=False) + + return block class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): __slots__ = () @@ -1753,7 +1865,7 @@ def to_dense(self): return self.values.to_dense().view() def convert(self, copy=True, **kwargs): - return [self.copy() if copy else self] + return self.copy() if copy else self @property def array_dtype(self): @@ -1767,16 +1879,16 @@ def _slice(self, slicer): # return same dims as we currently have return self.values._slice(slicer) - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None, mgr=None): # we may need to upcast our fill to match our dtype if limit is not None: raise NotImplementedError("specifying a limit for 'fillna' has " "not been implemented yet") values = self.values if inplace else self.values.copy() - return [self.make_block_same_class(values=values.fillna(value=value, - limit=limit), - placement=self.mgr_locs)] + values = self._try_coerce_result(values.fillna(value=value, + limit=limit)) + return [self.make_block(values=values)] def interpolate(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, **kwargs): @@ -1787,7 +1899,7 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=limit), placement=self.mgr_locs) - def shift(self, periods, axis=0): + def shift(self, periods, axis=0, mgr=None): return self.make_block_same_class(values=self.values.shift(periods), placement=self.mgr_locs) @@ -1815,30 +1927,8 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): return self.make_block_same_class(new_values, new_mgr_locs) - def putmask(self, mask, new, align=True, inplace=False, - axis=0, transpose=False): - """ putmask the data to the block; it is possible that we may create a - new dtype of block - - return the resulting block(s) - - Parameters - ---------- - mask : the condition to respect - new : a ndarray/object - align : boolean, perform alignment on other/cond, default is True - inplace : perform inplace modification, default is False - - Returns - ------- - a new block(s), the result of the putmask - """ - new_values = self.values if inplace else self.values.copy() - new_values[mask] = new - return [self.make_block_same_class(values=new_values, placement=self.mgr_locs)] - def _astype(self, dtype, copy=False, raise_on_error=True, values=None, - klass=None): + klass=None, mgr=None): """ Coerce to the new type (if copy=True, return a new copy) raise on an except if raise == True @@ -1882,7 +1972,7 @@ def __init__(self, values, placement, fastpath=True, placement=placement, **kwargs) - def _astype(self, dtype, **kwargs): + def _astype(self, dtype, mgr=None, **kwargs): """ these automatically copy, so copy=True has no effect raise on an except if raise == True @@ -1921,22 +2011,52 @@ def _try_operate(self, values): return values.view('i8') def _try_coerce_args(self, values, other): - """ Coerce values and other to dtype 'i8'. NaN and NaT convert to - the smallest i8, and will correctly round-trip to NaT if converted - back in _try_coerce_result. values is always ndarray-like, other - may not be """ + """ + Coerce values and other to dtype 'i8'. NaN and NaT convert to + the smallest i8, and will correctly round-trip to NaT if converted + back in _try_coerce_result. values is always ndarray-like, other + may not be + + Parameters + ---------- + values : ndarray-like + other : ndarray-like or scalar + + Returns + ------- + base-type values, values mask, base-type other, other mask + """ + + values_mask = isnull(values) values = values.view('i8') + other_mask = False - if is_null_datelike_scalar(other): + if isinstance(other, bool): + raise TypeError + elif is_null_datelike_scalar(other): other = tslib.iNaT + other_mask = True elif isinstance(other, (datetime, np.datetime64, date)): - other = lib.Timestamp(other).asm8.view('i8') + other = lib.Timestamp(other) + if getattr(other,'tz') is not None: + raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") + other_mask = isnull(other) + other = other.asm8.view('i8') elif hasattr(other, 'dtype') and com.is_integer_dtype(other): other = other.view('i8') else: - other = np.array(other, dtype='i8') + try: + other = np.asarray(other) + other_mask = isnull(other) + + other = other.astype('i8',copy=False).view('i8') + except ValueError: - return values, other + # coercion issues + # let higher levels handle + raise TypeError + + return values, values_mask, other, other_mask def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -1951,52 +2071,6 @@ def _try_coerce_result(self, result): def fill_value(self): return tslib.iNaT - def _try_fill(self, value): - """ if we are a NaT, return the actual fill value """ - if isinstance(value, type(tslib.NaT)) or np.array(isnull(value)).all(): - value = tslib.iNaT - return value - - def fillna(self, value, limit=None, - inplace=False, downcast=None): - - mask = isnull(self.values) - value = self._try_fill(value) - - if limit is not None: - if self.ndim > 2: - raise NotImplementedError("number of dimensions for 'fillna' " - "is currently limited to 2") - mask[mask.cumsum(self.ndim-1)>limit]=False - - if mask.any(): - try: - return self._fillna_mask(mask, value, inplace=inplace) - except TypeError: - pass - # _fillna_mask raises TypeError when it fails - # cannot perform inplace op because of object coercion - values = self.get_values(dtype=object) - np.putmask(values, mask, value) - return [self.make_block(values, fastpath=True)] - else: - return [self if inplace else self.copy()] - - def _fillna_mask(self, mask, value, inplace=False): - if getattr(value, 'tzinfo', None) is None: - # Series comes to this path - values = self.values - if not inplace: - values = values.copy() - try: - np.putmask(values, mask, value) - return [self if inplace else - self.make_block(values, fastpath=True)] - except (ValueError, TypeError): - # scalar causes ValueError, and array causes TypeError - pass - raise TypeError - def to_native_types(self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -2068,28 +2142,25 @@ def get_values(self, dtype=None): .reshape(self.values.shape) return self.values - def _fillna_mask(self, mask, value, inplace=False): - # cannot perform inplace op for internal DatetimeIndex - my_tz = tslib.get_timezone(self.values.tz) - value_tz = tslib.get_timezone(getattr(value, 'tzinfo', None)) - - if (my_tz == value_tz or self.dtype == getattr(value, 'dtype', None)): - if my_tz == value_tz: - # hack for PY2.6 / numpy 1.7.1. - # Other versions can directly use self.values.putmask - # -------------------------------------- - try: - value = value.asm8 - except AttributeError: - value = tslib.Timestamp(value).asm8 - ### ------------------------------------ + def to_object_block(self, mgr): + """ + return myself as an object block - try: - values = self.values.putmask(mask, value) - return [self.make_block(values, fastpath=True)] - except ValueError: - pass - raise TypeError + Since we keep the DTI as a 1-d object, this is different + depends on BlockManager's ndim + """ + values = self.get_values(dtype=object) + kwargs = {} + if mgr.ndim > 1: + values = _block_shape(values,ndim=mgr.ndim) + kwargs['ndim'] = mgr.ndim + kwargs['placement']=[0] + return self.make_block(values, klass=ObjectBlock, **kwargs) + + def replace(self, *args, **kwargs): + # if we are forced to ObjectBlock, then don't coerce (to UTC) + kwargs['convert'] = False + return super(DatetimeTZBlock, self).replace(*args, **kwargs) def _slice(self, slicer): """ return a slice of my values """ @@ -2101,22 +2172,46 @@ def _slice(self, slicer): return self.values[slicer] def _try_coerce_args(self, values, other): - """ localize and return i8 for the values """ - values = values.tz_localize(None).asi8 + """ + localize and return i8 for the values + + Parameters + ---------- + values : ndarray-like + other : ndarray-like or scalar - if is_null_datelike_scalar(other): + Returns + ------- + base-type values, values mask, base-type other, other mask + """ + values_mask = isnull(values) + values = values.tz_localize(None).asi8 + other_mask = False + + if isinstance(other, ABCSeries): + other = self._holder(other) + other_mask = isnull(other) + if isinstance(other, bool): + raise TypeError + elif is_null_datelike_scalar(other): other = tslib.iNaT + other_mask = True elif isinstance(other, self._holder): if other.tz != self.values.tz: raise ValueError("incompatible or non tz-aware value") other = other.tz_localize(None).asi8 - else: + other_mask = isnull(other) + elif isinstance(other, (np.datetime64, datetime, date)): other = lib.Timestamp(other) - if not getattr(other, 'tz', None): + tz = getattr(other, 'tz', None) + + # test we can have an equal time zone + if tz is None or str(tz) != str(self.values.tz): raise ValueError("incompatible or non tz-aware value") - other = other.value + other_mask = isnull(other) + other = other.tz_localize(None).value - return values, other + return values, values_mask, other, other_mask def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -2128,7 +2223,7 @@ def _try_coerce_result(self, result): result = lib.Timestamp(result, tz=self.values.tz) return result - def shift(self, periods, axis=0): + def shift(self, periods, axis=0, mgr=None): """ shift the block by periods """ ### think about moving this to the DatetimeIndex. This is a non-freq (number of periods) shift ### @@ -2210,7 +2305,7 @@ def __len__(self): except: return 0 - def copy(self, deep=True): + def copy(self, deep=True, mgr=None): return self.make_block_same_class(values=self.values, sparse_index=self.sp_index, kind=self.kind, copy=deep, @@ -2259,7 +2354,7 @@ def interpolate(self, method='pad', axis=0, inplace=False, return self.make_block_same_class(values=values, placement=self.mgr_locs) - def fillna(self, value, limit=None, inplace=False, downcast=None): + def fillna(self, value, limit=None, inplace=False, downcast=None, mgr=None): # we may need to upcast our fill to match our dtype if limit is not None: raise NotImplementedError("specifying a limit for 'fillna' has " @@ -2271,7 +2366,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): fill_value=value, placement=self.mgr_locs)] - def shift(self, periods, axis=0): + def shift(self, periods, axis=0, mgr=None): """ shift the block by periods """ N = len(self.values.T) indexer = np.zeros(N, dtype=int) @@ -2715,12 +2810,9 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, consolidate kwargs[k] = obj.reindex_axis(b_items, axis=axis, copy=align_copy) + kwargs['mgr'] = self applied = getattr(b, f)(**kwargs) - - if isinstance(applied, list): - result_blocks.extend(applied) - else: - result_blocks.append(applied) + result_blocks = _extend_blocks(applied, result_blocks) if len(result_blocks) == 0: return self.make_empty(axes or self.axes) @@ -2768,9 +2860,12 @@ def convert(self, **kwargs): def replace(self, **kwargs): return self.apply('replace', **kwargs) - def replace_list(self, src_list, dest_list, inplace=False, regex=False): + def replace_list(self, src_list, dest_list, inplace=False, regex=False, mgr=None): """ do a list replace """ + if mgr is None: + mgr = self + # figure out our mask a-priori to avoid repeated replacements values = self.as_matrix() @@ -2792,11 +2887,8 @@ def comp(s): for b in rb: if b.dtype == np.object_: result = b.replace(s, d, inplace=inplace, - regex=regex) - if isinstance(result, list): - new_rb.extend(result) - else: - new_rb.append(result) + regex=regex, mgr=mgr) + new_rb = _extend_blocks(result, new_rb) else: # get our mask for this element, sized to this # particular block @@ -2930,7 +3022,7 @@ def __contains__(self, item): def nblocks(self): return len(self.blocks) - def copy(self, deep=True): + def copy(self, deep=True, mgr=None): """ Make deep or shallow copy of BlockManager @@ -4084,15 +4176,12 @@ def _consolidate(blocks): for (_can_consolidate, dtype), group_blocks in grouper: merged_blocks = _merge_blocks(list(group_blocks), dtype=dtype, _can_consolidate=_can_consolidate) - if isinstance(merged_blocks, list): - new_blocks.extend(merged_blocks) - else: - new_blocks.append(merged_blocks) - + new_blocks = _extend_blocks(merged_blocks, new_blocks) return new_blocks def _merge_blocks(blocks, dtype=None, _can_consolidate=True): + if len(blocks) == 1: return blocks[0] @@ -4119,6 +4208,22 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): return blocks +def _extend_blocks(result, blocks=None): + """ return a new extended blocks, givin the result """ + if blocks is None: + blocks = [] + if isinstance(result, list): + for r in result: + if isinstance(r, list): + blocks.extend(r) + else: + blocks.append(r) + elif isinstance(result, BlockManager): + blocks.extend(result.blocks) + else: + blocks.append(result) + return blocks + def _block_shape(values, ndim=1, shape=None): """ guarantee the shape of the values to be at least 1 d """ if values.ndim <= ndim: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4de641bb67926..4e25b546bddf2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1839,7 +1839,9 @@ def set_atom_string(self, block, block_items, existing_col, min_itemsize, nan_rep, encoding): # fill nan items with myself, don't disturb the blocks by # trying to downcast - block = block.fillna(nan_rep, downcast=False)[0] + block = block.fillna(nan_rep, downcast=False) + if isinstance(block, list): + block = block[0] data = block.values # see if we have a valid string type diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0972b2e17c48a..13c671e8e4e59 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7221,6 +7221,7 @@ def test_to_csv_chunking(self): rs = read_csv(filename,index_col=0) assert_frame_equal(rs, aa) + @slow def test_to_csv_wide_frame_formatting(self): # Issue #8621 df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) @@ -9458,18 +9459,20 @@ def test_regex_replace_dict_nested(self): def test_regex_replace_dict_nested_gh4115(self): df = pd.DataFrame({'Type':['Q','T','Q','Q','T'], 'tmp':2}) expected = DataFrame({'Type': [0,1,0,0,1], 'tmp': 2}) - assert_frame_equal(df.replace({'Type': {'Q':0,'T':1}}), expected) + result = df.replace({'Type': {'Q':0,'T':1}}) + assert_frame_equal(result, expected) def test_regex_replace_list_to_scalar(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) + expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4), + 'c': [nan, nan, nan, 'd']}) + res = df.replace([r'\s*\.\s*', 'a|b'], nan, regex=True) res2 = df.copy() res3 = df.copy() res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True) res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True) - expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4), - 'c': [nan, nan, nan, 'd']}) assert_frame_equal(res, expec) assert_frame_equal(res2, expec) assert_frame_equal(res3, expec) @@ -9523,8 +9526,8 @@ def test_regex_replace_series_of_regexes(self): def test_regex_replace_numeric_to_object_conversion(self): mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) - res = df.replace(0, 'a') expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) + res = df.replace(0, 'a') assert_frame_equal(res, expec) self.assertEqual(res.a.dtype, np.object_) @@ -9953,6 +9956,56 @@ def test_replace_datetime(self): result = df.replace(d) tm.assert_frame_equal(result, expected) + def test_replace_datetimetz(self): + + # GH 11326 + # behaving poorly when presented with a datetime64[ns, tz] + df = DataFrame({'A' : date_range('20130101',periods=3,tz='US/Eastern'), + 'B' : [0, np.nan, 2]}) + result = df.replace(np.nan,1) + expected = DataFrame({'A' : date_range('20130101',periods=3,tz='US/Eastern'), + 'B' : Series([0, 1, 2],dtype='float64')}) + assert_frame_equal(result, expected) + + result = df.fillna(1) + assert_frame_equal(result, expected) + + result = df.replace(0,np.nan) + expected = DataFrame({'A' : date_range('20130101',periods=3,tz='US/Eastern'), + 'B' : [np.nan, np.nan, 2]}) + assert_frame_equal(result, expected) + + result = df.replace(Timestamp('20130102',tz='US/Eastern'),Timestamp('20130104',tz='US/Eastern')) + expected = DataFrame({'A' : [Timestamp('20130101',tz='US/Eastern'), + Timestamp('20130104',tz='US/Eastern'), + Timestamp('20130103',tz='US/Eastern')], + 'B' : [0, np.nan, 2]}) + assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1,0] = np.nan + result = result.replace({'A' : pd.NaT }, Timestamp('20130104',tz='US/Eastern')) + assert_frame_equal(result, expected) + + # coerce to object + result = df.copy() + result.iloc[1,0] = np.nan + result = result.replace({'A' : pd.NaT }, Timestamp('20130104',tz='US/Pacific')) + expected = DataFrame({'A' : [Timestamp('20130101',tz='US/Eastern'), + Timestamp('20130104',tz='US/Pacific'), + Timestamp('20130103',tz='US/Eastern')], + 'B' : [0, np.nan, 2]}) + assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1,0] = np.nan + result = result.replace({'A' : np.nan }, Timestamp('20130104')) + expected = DataFrame({'A' : [Timestamp('20130101',tz='US/Eastern'), + Timestamp('20130104'), + Timestamp('20130103',tz='US/Eastern')], + 'B' : [0, np.nan, 2]}) + assert_frame_equal(result, expected) + def test_combine_multiple_frames_dtypes(self): # GH 2759 diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 00553102e172f..fbab0d2a92203 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -306,7 +306,7 @@ def test_try_coerce_arg(self): block = create_block('datetime', [0]) # coerce None - none_coerced = block._try_coerce_args(block.values, None)[1] + none_coerced = block._try_coerce_args(block.values, None)[2] self.assertTrue(pd.Timestamp(none_coerced) is pd.NaT) # coerce different types of date bojects @@ -314,7 +314,7 @@ def test_try_coerce_arg(self): datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: - coerced = block._try_coerce_args(block.values, val)[1] + coerced = block._try_coerce_args(block.values, val)[2] self.assertEqual(np.int64, type(coerced)) self.assertEqual(pd.Timestamp('2010-10-10'), pd.Timestamp(coerced)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index f8d2c8bfd0dfb..33f8ea080e21c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4032,6 +4032,21 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-04 10:00', tz=tz)]) self.assert_series_equal(expected, result) + # filling with a naive/other zone, coerce to object + result = s.fillna(Timestamp('20130101')) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2013-01-01'), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2013-01-01')]) + self.assert_series_equal(expected, result) + + result = s.fillna(Timestamp('20130101',tz='US/Pacific')) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2013-01-01',tz='US/Pacific'), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2013-01-01',tz='US/Pacific')]) + self.assert_series_equal(expected, result) + def test_fillna_int(self): s = Series(np.random.randint(-100, 100, 50)) s.fillna(method='ffill', inplace=True)