Skip to content

Commit fdd19a5

Browse files
sinhrksjreback
authored andcommitted
CLN/BUG: fix ndarray assignment may cause unexpected cast
supersedes pandas-dev#14145 closes pandas-dev#14001
1 parent 4ca29f4 commit fdd19a5

File tree

11 files changed

+309
-77
lines changed

11 files changed

+309
-77
lines changed

doc/source/whatsnew/v0.21.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Backwards incompatible API changes
5050
- Accessing a non-existent attribute on a closed :class:`HDFStore` will now
5151
raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
5252

53+
5354
.. _whatsnew_0210.api:
5455

5556
Other API Changes
@@ -87,6 +88,9 @@ Bug Fixes
8788
Conversion
8889
^^^^^^^^^^
8990

91+
- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
92+
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
93+
9094

9195

9296
Indexing

pandas/core/dtypes/cast.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def maybe_promote(dtype, fill_value=np.nan):
272272
else:
273273
if issubclass(dtype.type, np.datetime64):
274274
try:
275-
fill_value = lib.Timestamp(fill_value).value
275+
fill_value = Timestamp(fill_value).value
276276
except:
277277
# the proper thing to do here would probably be to upcast
278278
# to object (but numpy 1.6.1 doesn't do this properly)
@@ -349,9 +349,9 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
349349

350350
# a 1-element ndarray
351351
if isinstance(val, np.ndarray):
352+
msg = "invalid ndarray passed to _infer_dtype_from_scalar"
352353
if val.ndim != 0:
353-
raise ValueError(
354-
"invalid ndarray passed to _infer_dtype_from_scalar")
354+
raise ValueError(msg)
355355

356356
dtype = val.dtype
357357
val = val.item()
@@ -552,7 +552,7 @@ def conv(r, dtype):
552552
if isnull(r):
553553
pass
554554
elif dtype == _NS_DTYPE:
555-
r = lib.Timestamp(r)
555+
r = Timestamp(r)
556556
elif dtype == _TD_DTYPE:
557557
r = _coerce_scalar_to_timedelta_type(r)
558558
elif dtype == np.bool_:
@@ -1026,3 +1026,19 @@ def find_common_type(types):
10261026
return np.object
10271027

10281028
return np.find_common_type(types, [])
1029+
1030+
1031+
def _cast_scalar_to_array(shape, value, dtype=None):
1032+
"""
1033+
create np.ndarray of specified shape and dtype, filled with values
1034+
"""
1035+
1036+
if dtype is None:
1037+
dtype, fill_value = _infer_dtype_from_scalar(value)
1038+
else:
1039+
fill_value = value
1040+
1041+
values = np.empty(shape, dtype=dtype)
1042+
values.fill(fill_value)
1043+
1044+
return values

pandas/core/frame.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
is_named_tuple)
6060
from pandas.core.dtypes.missing import isnull, notnull
6161

62+
6263
from pandas.core.common import (_try_sort,
6364
_default_index,
6465
_values_from_object,
@@ -355,15 +356,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
355356
raise_with_traceback(exc)
356357

357358
if arr.ndim == 0 and index is not None and columns is not None:
358-
if isinstance(data, compat.string_types) and dtype is None:
359-
dtype = np.object_
360-
if dtype is None:
361-
dtype, data = infer_dtype_from_scalar(data)
362-
363-
values = np.empty((len(index), len(columns)), dtype=dtype)
364-
values.fill(data)
365-
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
366-
copy=False)
359+
values = _cast_scalar_to_array((len(index), len(columns)),
360+
data, dtype=dtype)
361+
mgr = self._init_ndarray(values, index, columns,
362+
dtype=values.dtype, copy=False)
367363
else:
368364
raise ValueError('DataFrame constructor not properly called!')
369365

@@ -477,7 +473,7 @@ def _get_axes(N, K, index=index, columns=columns):
477473
values = _prep_ndarray(values, copy=copy)
478474

479475
if dtype is not None:
480-
if values.dtype != dtype:
476+
if not is_dtype_equal(values.dtype, dtype):
481477
try:
482478
values = values.astype(dtype)
483479
except Exception as orig:
@@ -2653,9 +2649,8 @@ def reindexer(value):
26532649

26542650
else:
26552651
# upcast the scalar
2656-
dtype, value = infer_dtype_from_scalar(value)
2657-
value = np.repeat(value, len(self.index)).astype(dtype)
2658-
value = maybe_cast_to_datetime(value, dtype)
2652+
value = _cast_scalar_to_array(len(self.index), value)
2653+
value = _possibly_cast_to_datetime(value, value.dtype)
26592654

26602655
# return internal types directly
26612656
if is_extension_type(value):

pandas/core/internals.py

+88-32
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
388388

389389
# fillna, but if we cannot coerce, then try again as an ObjectBlock
390390
try:
391-
values, _, value, _ = self._try_coerce_args(self.values, value)
391+
values, _, _, _ = self._try_coerce_args(self.values, value)
392+
# value may be converted to internal, thus drop
392393
blocks = self.putmask(mask, value, inplace=inplace)
393394
blocks = [b.make_block(values=self._try_coerce_result(b.values))
394395
for b in blocks]
@@ -682,8 +683,43 @@ def setitem(self, indexer, value, mgr=None):
682683
if self.is_numeric:
683684
value = np.nan
684685

685-
# coerce args
686-
values, _, value, _ = self._try_coerce_args(self.values, value)
686+
# coerce if block dtype can store value
687+
values = self.values
688+
try:
689+
values, _, value, _ = self._try_coerce_args(values, value)
690+
# can keep its own dtype
691+
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
692+
value.dtype):
693+
dtype = self.dtype
694+
else:
695+
dtype = 'infer'
696+
697+
except (TypeError, ValueError):
698+
# current dtype cannot store value, coerce to common dtype
699+
find_dtype = False
700+
701+
if hasattr(value, 'dtype'):
702+
dtype = value.dtype
703+
find_dtype = True
704+
705+
elif is_scalar(value):
706+
if isnull(value):
707+
# NaN promotion is handled in latter path
708+
dtype = False
709+
else:
710+
dtype, _ = _infer_dtype_from_scalar(value,
711+
pandas_dtype=True)
712+
find_dtype = True
713+
else:
714+
dtype = 'infer'
715+
716+
if find_dtype:
717+
dtype = _find_common_type([values.dtype, dtype])
718+
if not is_dtype_equal(self.dtype, dtype):
719+
b = self.astype(dtype)
720+
return b.setitem(indexer, value, mgr=mgr)
721+
722+
# value must be storeable at this moment
687723
arr_value = np.array(value)
688724

689725
# cast the values to a type that can hold nan (if necessary)
@@ -713,19 +749,8 @@ def setitem(self, indexer, value, mgr=None):
713749
raise ValueError("cannot set using a slice indexer with a "
714750
"different length than the value")
715751

716-
try:
717-
718-
def _is_scalar_indexer(indexer):
719-
# return True if we are all scalar indexers
720-
721-
if arr_value.ndim == 1:
722-
if not isinstance(indexer, tuple):
723-
indexer = tuple([indexer])
724-
return all([is_scalar(idx) for idx in indexer])
725-
return False
726-
727-
def _is_empty_indexer(indexer):
728-
# return a boolean if we have an empty indexer
752+
def _is_scalar_indexer(indexer):
753+
# return True if we are all scalar indexers
729754

730755
if arr_value.ndim == 1:
731756
if not isinstance(indexer, tuple):
@@ -777,23 +802,43 @@ def _is_empty_indexer(indexer):
777802
raise
778803
except TypeError:
779804

780-
# cast to the passed dtype if possible
781-
# otherwise raise the original error
782-
try:
783-
# e.g. we are uint32 and our value is uint64
784-
# this is for compat with older numpies
785-
block = self.make_block(transf(values.astype(value.dtype)))
786-
return block.setitem(indexer=indexer, value=value, mgr=mgr)
805+
def _is_empty_indexer(indexer):
806+
# return a boolean if we have an empty indexer
787807

788-
except:
789-
pass
790-
791-
raise
808+
if arr_value.ndim == 1:
809+
if not isinstance(indexer, tuple):
810+
indexer = tuple([indexer])
811+
return any(isinstance(idx, np.ndarray) and len(idx) == 0
812+
for idx in indexer)
813+
return False
792814

793-
except Exception:
815+
# empty indexers
816+
# 8669 (empty)
817+
if _is_empty_indexer(indexer):
794818
pass
795819

796-
return [self]
820+
# setting a single element for each dim and with a rhs that could
821+
# be say a list
822+
# GH 6043
823+
elif _is_scalar_indexer(indexer):
824+
values[indexer] = value
825+
826+
# if we are an exact match (ex-broadcasting),
827+
# then use the resultant dtype
828+
elif (len(arr_value.shape) and
829+
arr_value.shape[0] == values.shape[0] and
830+
np.prod(arr_value.shape) == np.prod(values.shape)):
831+
values[indexer] = value
832+
values = values.astype(arr_value.dtype)
833+
834+
# set
835+
else:
836+
values[indexer] = value
837+
838+
# coerce and try to infer the dtypes of the result
839+
values = self._try_coerce_and_cast_result(values, dtype)
840+
block = self.make_block(transf(values), fastpath=True)
841+
return block
797842

798843
def putmask(self, mask, new, align=True, inplace=False, axis=0,
799844
transpose=False, mgr=None):
@@ -1264,6 +1309,7 @@ def func(cond, values, other):
12641309

12651310
values, values_mask, other, other_mask = self._try_coerce_args(
12661311
values, other)
1312+
12671313
try:
12681314
return self._try_coerce_result(expressions.where(
12691315
cond, values, other, raise_on_error=True))
@@ -1543,6 +1589,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
15431589
new = new[mask]
15441590

15451591
mask = _safe_reshape(mask, new_values.shape)
1592+
15461593
new_values[mask] = new
15471594
new_values = self._try_coerce_result(new_values)
15481595
return [self.make_block(values=new_values)]
@@ -1712,7 +1759,7 @@ def fillna(self, value, **kwargs):
17121759

17131760
# allow filling with integers to be
17141761
# interpreted as seconds
1715-
if not isinstance(value, np.timedelta64) and is_integer(value):
1762+
if not isinstance(value, np.timedelta64):
17161763
value = Timedelta(value, unit='s')
17171764
return super(TimeDeltaBlock, self).fillna(value, **kwargs)
17181765

@@ -1949,6 +1996,15 @@ def _maybe_downcast(self, blocks, downcast=None):
19491996
def _can_hold_element(self, element):
19501997
return True
19511998

1999+
def _try_coerce_args(self, values, other):
2000+
""" provide coercion to our input arguments """
2001+
2002+
if isinstance(other, ABCDatetimeIndex):
2003+
# to store DatetimeTZBlock as object
2004+
other = other.asobject.values
2005+
2006+
return values, False, other, False
2007+
19522008
def _try_cast(self, element):
19532009
return element
19542010

@@ -2288,8 +2344,6 @@ def _try_coerce_args(self, values, other):
22882344
"naive Block")
22892345
other_mask = isnull(other)
22902346
other = other.asm8.view('i8')
2291-
elif hasattr(other, 'dtype') and is_integer_dtype(other):
2292-
other = other.view('i8')
22932347
else:
22942348
try:
22952349
other = np.asarray(other)
@@ -2466,6 +2520,8 @@ def _try_coerce_args(self, values, other):
24662520
raise ValueError("incompatible or non tz-aware value")
24672521
other_mask = isnull(other)
24682522
other = other.value
2523+
else:
2524+
raise TypeError
24692525

24702526
return values, values_mask, other, other_mask
24712527

pandas/core/panel.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
178178
copy = False
179179
dtype = None
180180
elif is_scalar(data) and all(x is not None for x in passed_axes):
181-
if dtype is None:
182-
dtype, data = infer_dtype_from_scalar(data)
183-
values = np.empty([len(x) for x in passed_axes], dtype=dtype)
184-
values.fill(data)
185-
mgr = self._init_matrix(values, passed_axes, dtype=dtype,
181+
values = _cast_scalar_to_array([len(x) for x in passed_axes],
182+
data, dtype=dtype)
183+
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
186184
copy=False)
187185
copy = False
188186
else: # pragma: no cover
@@ -582,9 +580,7 @@ def __setitem__(self, key, value):
582580
shape[1:], tuple(map(int, value.shape))))
583581
mat = np.asarray(value)
584582
elif is_scalar(value):
585-
dtype, value = infer_dtype_from_scalar(value)
586-
mat = np.empty(shape[1:], dtype=dtype)
587-
mat.fill(value)
583+
mat = _cast_scalar_to_array(shape[1:], value)
588584
else:
589585
raise TypeError('Cannot set item of type: %s' % str(type(value)))
590586

0 commit comments

Comments
 (0)