Skip to content

Commit 6cdea33

Browse files
committed
CLN: cleaned up _possibly_convert_platform
1 parent 37bb22a commit 6cdea33

File tree

9 files changed

+228
-160
lines changed

9 files changed

+228
-160
lines changed

RELEASE.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ pandas 0.11.0
5353

5454
- Do not automatically upcast numeric specified dtypes to ``int64`` or
5555
``float64`` (GH622_ and GH797_)
56-
- DataFrame construction of lists will no longer be platform dependent when
57-
dtype is NOT specified, e.g. DataFrame([1,2]) will be ``int64``
58-
like DataFrame({'a' : [1,2]})
56+
- DataFrame construction of lists and scalars, with no dtype present, will
57+
result in casting to ``int64`` or ``float64``, regardless of platform.
58+
This is not an apparent change in the API, but noting it.
5959
- Guarantee that ``convert_objects()`` for Series/DataFrame always returns a
6060
copy
6161
- groupby operations will respect dtypes for numeric float operations

doc/source/v0.11.0.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,17 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
5757

5858
**Platform Gotchas**
5959

60-
In versions prior to 0.11.0, DataFrame construction with lists was platform dependent (meaning 32-bit vs 64-bit).
61-
``DataFrame([1,2],columns=['a'])`` would have a dtype of ``int32``,
62-
while ``DataFrame({'a' : [1,2] })`` would be ``int64``.
63-
Now construction dtype defaults will be handled in a platform independent manor,
64-
resulting in defaults for integers of ``int64`` and floats of ``float64`` dtypes.
60+
Starting in 0.11.0, construction of DataFrame/Series will use default dtypes of ``int64`` and ``float64``,
61+
*regardless of platform*. This is not an apparent change from earlier versions of pandas. If you specify
62+
dtypes, they *WILL* be respected, however.
63+
64+
The following will all result in ``int64`` dtypes
65+
66+
.. ipython:: python
67+
68+
DataFrame([1,2],columns=['a']).dtypes
69+
DataFrame({'a' : [1,2] }.dtypes
70+
DataFrame({'a' : 1).dtypes
6571

6672
Keep in mind that ``DataFrame(np.array([1,2]))`` **WILL** result in ``int32`` on 32-bit platforms!
6773

pandas/core/common.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,21 @@ def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None,
643643
take_f(arr, indexer, out=out, fill_value=fill_value)
644644
return out
645645

646+
def _dtype_from_scalar(val):
647+
""" interpret the dtype from a scalar, upcast floats and ints """
648+
if isinstance(val, np.datetime64):
649+
# ugly hacklet
650+
val = lib.Timestamp(val).value
651+
return val, np.dtype('M8[ns]')
652+
653+
# provide implicity upcast on scalars
654+
elif is_integer(val):
655+
if not is_bool(val):
656+
return val, np.int64
657+
elif is_float(val):
658+
return val, np.float64
659+
660+
return val, type(val)
646661

647662
def _maybe_promote(dtype, fill_value=np.nan):
648663
if issubclass(dtype.type, np.datetime64):
@@ -654,7 +669,7 @@ def _maybe_promote(dtype, fill_value=np.nan):
654669
if issubclass(dtype.type, np.bool_):
655670
return np.object_
656671
elif issubclass(dtype.type, np.integer):
657-
return np.float_
672+
return np.float64
658673
return dtype
659674
elif is_bool(fill_value):
660675
if issubclass(dtype.type, np.bool_):
@@ -682,7 +697,7 @@ def _maybe_promote(dtype, fill_value=np.nan):
682697
def _maybe_upcast(values):
683698
# TODO: convert remaining usage of _maybe_upcast to _maybe_promote
684699
if issubclass(values.dtype.type, np.integer):
685-
values = values.astype(np.float_)
700+
values = values.astype(np.float64)
686701
elif issubclass(values.dtype.type, np.bool_):
687702
values = values.astype(np.object_)
688703
return values
@@ -805,11 +820,11 @@ def _consensus_name_attr(objs):
805820
# Lots of little utilities
806821

807822

808-
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_platform=False):
823+
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
809824
""" if we have an object dtype, try to coerce dates and/or numers """
810825

811826
# convert dates
812-
if convert_dates and getattr(values,'dtype',None) == np.object_:
827+
if convert_dates and values.dtype == np.object_:
813828

814829
# we take an aggressive stance and convert to datetime64[ns]
815830
if convert_dates == 'coerce':
@@ -823,7 +838,7 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True,
823838
values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)
824839

825840
# convert to numeric
826-
if convert_numeric and getattr(values,'dtype',None) == np.object_:
841+
if convert_numeric and values.dtype == np.object_:
827842
try:
828843
new_values = lib.maybe_convert_numeric(values,set(),coerce_numeric=True)
829844

@@ -834,13 +849,15 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True,
834849
except:
835850
pass
836851

837-
# platform conversion
838-
# allow ndarray or list here
839-
if convert_platform:
840-
if isinstance(values, (list,tuple)):
841-
values = lib.list_to_object_array(values)
842-
if values.dtype == np.object_:
843-
values = lib.maybe_convert_objects(values)
852+
return values
853+
854+
def _possibly_convert_platform(values):
855+
""" try to do platform conversion, allow ndarray or list here """
856+
857+
if isinstance(values, (list,tuple)):
858+
values = lib.list_to_object_array(values)
859+
if values.dtype == np.object_:
860+
values = lib.maybe_convert_objects(values)
844861

845862
return values
846863

@@ -887,12 +904,13 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
887904

888905

889906
def _infer_dtype(value):
907+
# provide upcasting here for floats/ints
890908
if isinstance(value, (float, np.floating)):
891-
return np.float_
909+
return np.float64
892910
elif isinstance(value, (bool, np.bool_)):
893911
return np.bool_
894912
elif isinstance(value, (int, long, np.integer)):
895-
return np.int_
913+
return np.int64
896914
elif isinstance(value, (complex, np.complexfloating)):
897915
return np.complex_
898916
else:

pandas/core/frame.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
import numpy.ma as ma
2424

2525
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
26-
_default_index, _is_sequence)
26+
_default_index, _is_sequence, _dtype_from_scalar)
2727
from pandas.core.generic import NDFrame
2828
from pandas.core.index import Index, MultiIndex, _ensure_index
2929
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
3030
_is_index_slice, _check_bool_indexer)
3131
from pandas.core.internals import BlockManager, make_block, form_blocks
32-
from pandas.core.series import Series, _radd_compat, _dtype_from_scalar
32+
from pandas.core.series import Series, _radd_compat
3333
from pandas.compat.scipy import scoreatpercentile as _quantile
3434
from pandas.util.compat import OrderedDict
3535
from pandas.util import py3compat
@@ -2207,21 +2207,29 @@ def _sanitize_column(self, key, value):
22072207
if key in self.columns:
22082208
existing_piece = self[key]
22092209

2210+
# upcast the scalar
2211+
value, dtype = _dtype_from_scalar(value)
2212+
22102213
# transpose hack
22112214
if isinstance(existing_piece, DataFrame):
22122215
shape = (len(existing_piece.columns), len(self.index))
22132216
value = np.repeat(value, np.prod(shape)).reshape(shape)
22142217
else:
22152218
value = np.repeat(value, len(self.index))
22162219

2217-
# special case for now
2220+
# special case for now (promotion)
22182221
if (com.is_float_dtype(existing_piece) and
22192222
com.is_integer_dtype(value)):
2220-
value = value.astype(np.float64)
2223+
dtype = np.float64
2224+
2225+
value = value.astype(dtype)
22212226

22222227
else:
2223-
value = np.repeat(value, len(self.index))
2228+
# upcast the scalar
2229+
value, dtype = _dtype_from_scalar(value)
2230+
value = np.array(np.repeat(value, len(self.index)), dtype=dtype)
22242231

2232+
value = com._possibly_cast_to_datetime(value, dtype)
22252233
return np.atleast_2d(np.asarray(value))
22262234

22272235
def pop(self, item):
@@ -5461,11 +5469,7 @@ def _prep_ndarray(values, copy=True):
54615469
return np.empty((0, 0), dtype=object)
54625470

54635471
def convert(v):
5464-
return com._possibly_convert_objects(v,
5465-
convert_dates=False,
5466-
convert_numeric=False,
5467-
convert_platform=True)
5468-
5472+
return com._possibly_convert_platform(v)
54695473

54705474
# we could have a 1-dim or 2-dim list here
54715475
# this is equiv of np.asarray, but does object conversion

pandas/core/series.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3113,10 +3113,7 @@ def _try_cast(arr):
31133113
subarr = lib.maybe_convert_objects(subarr)
31143114

31153115
else:
3116-
subarr = com._possibly_convert_objects(data,
3117-
convert_dates=False,
3118-
convert_numeric=False,
3119-
convert_platform=True)
3116+
subarr = com._possibly_convert_platform(data)
31203117

31213118
subarr = com._possibly_cast_to_datetime(subarr, dtype)
31223119

@@ -3145,7 +3142,7 @@ def _try_cast(arr):
31453142
dtype = value.dtype
31463143
value = value.item()
31473144
else:
3148-
value, dtype = _dtype_from_scalar(value)
3145+
value, dtype = com._dtype_from_scalar(value)
31493146

31503147
subarr = pa.empty(len(index), dtype=dtype)
31513148
else:
@@ -3180,14 +3177,6 @@ def _try_cast(arr):
31803177
return subarr
31813178

31823179

3183-
def _dtype_from_scalar(val):
3184-
if isinstance(val, np.datetime64):
3185-
# ugly hacklet
3186-
val = lib.Timestamp(val).value
3187-
return val, np.dtype('M8[ns]')
3188-
return val, type(val)
3189-
3190-
31913180
def _get_rename_function(mapper):
31923181
if isinstance(mapper, (dict, Series)):
31933182
def f(x):

0 commit comments

Comments
 (0)