Skip to content

Commit cb56c98

Browse files
committed
CLN: change call signature of _maybe_promote (from stephenwlin branch)
and _infer_dtype_from_scalar to match (both return dtype, fill_value) Diff between 'jreback/dtypes_bug' and 'stephenwlin/dtypes_bug' Conflicts: pandas/core/common.py
1 parent 2ce3b56 commit cb56c98

File tree

7 files changed

+78
-67
lines changed

7 files changed

+78
-67
lines changed

pandas/core/common.py

+49-33
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ def take_1d(arr, indexer, out=None, fill_value=np.nan):
504504
dtype, fill_value = arr.dtype, arr.dtype.type()
505505
else:
506506
indexer = _ensure_int64(indexer)
507-
dtype = _maybe_promote(arr.dtype, fill_value)
507+
dtype = _maybe_promote(arr.dtype, fill_value)[0]
508508
if dtype != arr.dtype:
509509
mask = indexer == -1
510510
needs_masking = mask.any()
@@ -552,7 +552,7 @@ def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan, out=None):
552552
else:
553553
col_idx = _ensure_int64(col_idx)
554554

555-
dtype = _maybe_promote(arr.dtype, fill_value)
555+
dtype = _maybe_promote(arr.dtype, fill_value)[0]
556556
if dtype != arr.dtype:
557557
row_mask = row_idx == -1
558558
col_mask = col_idx == -1
@@ -588,7 +588,7 @@ def diff(arr, n, axis=0):
588588
n = int(n)
589589
dtype = arr.dtype
590590
if issubclass(dtype.type, np.integer):
591-
dtype = np.float_
591+
dtype = np.float64
592592
elif issubclass(dtype.type, np.bool_):
593593
dtype = np.object_
594594

@@ -629,7 +629,7 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
629629
else:
630630
indexer = _ensure_int64(indexer)
631631
if needs_masking:
632-
dtype = _maybe_promote(arr.dtype, fill_value)
632+
dtype = _maybe_promote(arr.dtype, fill_value)[0]
633633
if dtype != arr.dtype and out is not None and out.dtype != dtype:
634634
raise Exception('Incompatible type for fill_value')
635635
else:
@@ -644,16 +644,20 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
644644
take_f(arr, indexer, out=out, fill_value=fill_value)
645645
return out
646646

647+
647648
def _infer_dtype_from_scalar(val):
648649
""" interpret the dtype from a scalar, upcast floats and ints
649650
return the new value and the dtype """
650651

652+
dtype = np.object_
653+
651654
# a 1-element ndarray
652655
if isinstance(val, pa.Array):
653656
if val.ndim != 0:
654657
raise ValueError("invalid ndarray passed to _infer_dtype_from_scalar")
655658

656-
return val.item(), val.dtype
659+
dtype = val.dtype
660+
val = val.item()
657661

658662
elif isinstance(val, basestring):
659663

@@ -662,67 +666,79 @@ def _infer_dtype_from_scalar(val):
662666
# so this is kind of bad. Alternately we could use np.repeat
663667
# instead of np.empty (but then you still don't want things
664668
# coming out as np.str_!
665-
return val, np.object_
669+
670+
dtype = np.object_
666671

667672
elif isinstance(val, np.datetime64):
668673
# ugly hacklet
669-
val = lib.Timestamp(val).value
670-
return val, np.dtype('M8[ns]')
674+
val = lib.Timestamp(val).value
675+
dtype = np.dtype('M8[ns]')
671676

672677
elif is_bool(val):
673-
return val, np.bool_
678+
dtype = np.bool_
674679

675680
# provide implicity upcast on scalars
676681
elif is_integer(val):
677-
return val, np.int64
682+
dtype = np.int64
683+
678684
elif is_float(val):
679-
return val, np.float64
685+
dtype = np.float64
680686

681687
elif is_complex(val):
682-
return val, np.complex_
688+
dtype = np.complex_
683689

684-
return val, np.object_
690+
return dtype, val
685691

686692
def _maybe_promote(dtype, fill_value=np.nan):
693+
# returns tuple of (dtype, fill_value)
687694
if issubclass(dtype.type, np.datetime64):
688-
# for now: refuse to upcast
695+
# for now: refuse to upcast datetime64
689696
# (this is because datetime64 will not implicitly upconvert
690697
# to object correctly as of numpy 1.6.1)
691-
return dtype
698+
if isnull(fill_value):
699+
fill_value = tslib.iNaT
700+
else:
701+
try:
702+
fill_value = lib.Timestamp(fill_value).value
703+
except:
704+
# the proper thing to do here would probably be to upcast to
705+
# object (but numpy 1.6.1 doesn't do this properly)
706+
fill_value = tslib.iNaT
692707
elif is_float(fill_value):
693708
if issubclass(dtype.type, np.bool_):
694-
return np.object_
709+
dtype = np.object_
695710
elif issubclass(dtype.type, np.integer):
696-
return np.float64
697-
return dtype
711+
dtype = np.float64
698712
elif is_bool(fill_value):
699-
if issubclass(dtype.type, np.bool_):
700-
return dtype
701-
return np.object_
713+
if not issubclass(dtype.type, np.bool_):
714+
dtype = np.object_
702715
elif is_integer(fill_value):
703716
if issubclass(dtype.type, np.bool_):
704-
return np.object_
717+
dtype = np.object_
705718
elif issubclass(dtype.type, np.integer):
706719
# upcast to prevent overflow
707720
arr = np.asarray(fill_value)
708721
if arr != arr.astype(dtype):
709-
return arr.dtype
710-
return dtype
711-
return dtype
722+
dtype = arr.dtype
712723
elif is_complex(fill_value):
713724
if issubclass(dtype.type, np.bool_):
714-
return np.object_
725+
dtype = np.object_
715726
elif issubclass(dtype.type, (np.integer, np.floating)):
716-
return np.complex_
717-
return dtype
718-
return np.object_
727+
dtype = np.complex128
728+
else:
729+
dtype = np.object_
730+
return dtype, fill_value
719731

720-
def _maybe_upcast(values):
721-
""" provide explicty type promotion and coercion """
722-
new_dtype = _maybe_promote(values.dtype)
732+
def _maybe_upcast(values, fill_value=np.nan, copy=False):
733+
""" provide explicty type promotion and coercion
734+
if copy == True, then a copy is created even if no upcast is required """
735+
736+
new_dtype, fill_value = _maybe_promote(values.dtype, fill_value)
723737
if new_dtype != values.dtype:
724738
values = values.astype(new_dtype)
725-
return values
739+
elif copy:
740+
values = values.copy()
741+
return values, fill_value
726742

727743
def _possibly_cast_item(obj, item, dtype):
728744
chunk = obj[item]

pandas/core/frame.py

+6-15
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
390390
mgr = self._init_dict(data, index, columns, dtype=dtype)
391391
elif isinstance(data, ma.MaskedArray):
392392
mask = ma.getmaskarray(data)
393-
datacopy = ma.copy(data)
394-
if issubclass(data.dtype.type, np.datetime64):
395-
datacopy[mask] = tslib.iNaT
396-
else:
397-
datacopy = com._maybe_upcast(datacopy)
398-
datacopy[mask] = NA
393+
datacopy, fill_value = com._maybe_upcast(data, copy=True)
394+
datacopy[mask] = fill_value
399395
mgr = self._init_ndarray(datacopy, index, columns, dtype=dtype,
400396
copy=copy)
401397
elif isinstance(data, np.ndarray):
@@ -437,7 +433,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
437433
if isinstance(data, basestring) and dtype is None:
438434
dtype = np.object_
439435
if dtype is None:
440-
data, dtype = _infer_dtype_from_scalar(data)
436+
dtype, data = _infer_dtype_from_scalar(data)
441437

442438
values = np.empty((len(index), len(columns)), dtype=dtype)
443439
values.fill(data)
@@ -1878,7 +1874,7 @@ def set_value(self, index, col, value):
18781874
new_index, new_columns = self._expand_axes((index, col))
18791875
result = self.reindex(index=new_index, columns=new_columns,
18801876
copy=False)
1881-
value, likely_dtype = _infer_dtype_from_scalar(value)
1877+
likely_dtype, value = _infer_dtype_from_scalar(value)
18821878

18831879
made_bigger = not np.array_equal(new_columns, self.columns)
18841880

@@ -2208,7 +2204,7 @@ def _sanitize_column(self, key, value):
22082204
existing_piece = self[key]
22092205

22102206
# upcast the scalar
2211-
value, dtype = _infer_dtype_from_scalar(value)
2207+
dtype, value = _infer_dtype_from_scalar(value)
22122208

22132209
# transpose hack
22142210
if isinstance(existing_piece, DataFrame):
@@ -2217,16 +2213,11 @@ def _sanitize_column(self, key, value):
22172213
else:
22182214
value = np.repeat(value, len(self.index))
22192215

2220-
# special case for now (promotion)
2221-
if (com.is_float_dtype(existing_piece) and
2222-
com.is_integer_dtype(value)):
2223-
dtype = np.float64
2224-
22252216
value = value.astype(dtype)
22262217

22272218
else:
22282219
# upcast the scalar
2229-
value, dtype = _infer_dtype_from_scalar(value)
2220+
dtype, value = _infer_dtype_from_scalar(value)
22302221
value = np.array(np.repeat(value, len(self.index)), dtype=dtype)
22312222

22322223
value = com._possibly_cast_to_datetime(value, dtype)

pandas/core/internals.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -377,11 +377,11 @@ def shift(self, indexer, periods):
377377
new_values = self.values.take(indexer, axis=1)
378378
# convert integer to float if necessary. need to do a lot more than
379379
# that, handle boolean etc also
380-
new_values = com._maybe_upcast(new_values)
380+
new_values, fill_value = com._maybe_upcast(new_values)
381381
if periods > 0:
382-
new_values[:, :periods] = np.nan
382+
new_values[:, :periods] = fill_value
383383
else:
384-
new_values[:, periods:] = np.nan
384+
new_values[:, periods:] = fill_value
385385
return make_block(new_values, self.items, self.ref_items)
386386

387387
def where(self, func, other, cond = None, raise_on_error = True, try_cast = False):
@@ -1412,7 +1412,7 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
14121412
block_shape = list(self.shape)
14131413
block_shape[0] = len(items)
14141414

1415-
fill_value, dtype = com._infer_dtype_from_scalar(fill_value)
1415+
dtype, fill_value = com._infer_dtype_from_scalar(fill_value)
14161416
block_values = np.empty(block_shape, dtype=dtype)
14171417
block_values.fill(fill_value)
14181418
na_block = make_block(block_values, items, ref_items)

pandas/core/panel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ def set_value(self, *args):
658658
d = self._construct_axes_dict_from(self, axes, copy=False)
659659
result = self.reindex(**d)
660660
args = list(args)
661-
args[-1], likely_dtype = _infer_dtype_from_scalar(args[-1])
661+
likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1])
662662
made_bigger = not np.array_equal(
663663
axes[0], getattr(self, self._info_axis))
664664
# how to make this logic simpler?
@@ -693,7 +693,7 @@ def __setitem__(self, key, value):
693693
assert(value.shape == shape[1:])
694694
mat = np.asarray(value)
695695
elif np.isscalar(value):
696-
value, dtype = _infer_dtype_from_scalar(value)
696+
dtype, value = _infer_dtype_from_scalar(value)
697697
mat = np.empty(shape[1:], dtype=dtype)
698698
mat.fill(value)
699699
else:

pandas/core/reshape.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,9 @@ def get_new_values(self):
149149
stride = values.shape[1]
150150
result_width = width * stride
151151

152-
new_values = np.empty((length, result_width), dtype=_maybe_promote(values.dtype))
153-
new_values.fill(np.nan)
152+
dtype, fill_value = _maybe_promote(values.dtype)
153+
new_values = np.empty((length, result_width), dtype=dtype)
154+
new_values.fill(fill_value)
154155
new_mask = np.zeros((length, result_width), dtype=bool)
155156

156157
# is there a simpler / faster way of doing this?
@@ -773,12 +774,12 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None):
773774
mask = np.zeros(np.prod(shape), dtype=bool)
774775
mask.put(selector, True)
775776

776-
pvalues = np.empty(panel_shape, dtype=values.dtype)
777-
if not issubclass(pvalues.dtype.type, (np.integer, np.bool_)):
778-
pvalues.fill(np.nan)
779-
elif not mask.all():
780-
pvalues = _maybe_upcast(pvalues)
781-
pvalues.fill(np.nan)
777+
if mask.all():
778+
pvalues = np.empty(panel_shape, dtype=values.dtype)
779+
else:
780+
dtype, fill_value = _maybe_promote(values.dtype)
781+
pvalues = np.empty(panel_shape, dtype=dtype)
782+
pvalues.fill(fill_value)
782783

783784
values = values
784785
for i in xrange(len(items)):

pandas/core/series.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2818,14 +2818,15 @@ def _get_values():
28182818
return values
28192819

28202820
if offset is None:
2821-
new_values = pa.empty(len(self), dtype=_maybe_promote(self.dtype))
2821+
dtype, fill_value = _maybe_promote(self.dtype)
2822+
new_values = pa.empty(len(self), dtype=dtype)
28222823

28232824
if periods > 0:
28242825
new_values[periods:] = self.values[:-periods]
2825-
new_values[:periods] = nan
2826+
new_values[:periods] = fill_value
28262827
elif periods < 0:
28272828
new_values[:periods] = self.values[-periods:]
2828-
new_values[periods:] = nan
2829+
new_values[periods:] = fill_value
28292830

28302831
return Series(new_values, index=self.index, name=self.name)
28312832
elif isinstance(self.index, PeriodIndex):
@@ -3129,7 +3130,7 @@ def _try_cast(arr):
31293130

31303131
# figure out the dtype from the value (upcast if necessary)
31313132
if dtype is None:
3132-
value, dtype = _infer_dtype_from_scalar(value)
3133+
dtype, value = _infer_dtype_from_scalar(value)
31333134
else:
31343135
# need to possibly convert the value here
31353136
value = com._possibly_cast_to_datetime(value, dtype)

pandas/tests/test_frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,10 @@ def test_setitem_cast(self):
441441
self.assert_(self.frame['D'].dtype == np.int64)
442442

443443
# #669, should not cast?
444+
# this is now set to int64, which means a replacement of the column to
445+
# the value dtype (and nothing to do with the existing dtype)
444446
self.frame['B'] = 0
445-
self.assert_(self.frame['B'].dtype == np.float64)
447+
self.assert_(self.frame['B'].dtype == np.int64)
446448

447449
# cast if pass array of course
448450
self.frame['B'] = np.arange(len(self.frame))

0 commit comments

Comments
 (0)