Skip to content

Commit 3c345a1

Browse files
committed
BUG: fixup GH pandas-dev#2751; make sure that we cast to platform numeric
when a list is specified; use the Series codepath for initial list conversion (change from using DataFrame) TST: added test for overflow in df creation
1 parent fc8de6d commit 3c345a1

File tree

4 files changed

+72
-17
lines changed

4 files changed

+72
-17
lines changed

pandas/core/common.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -805,10 +805,11 @@ def _consensus_name_attr(objs):
805805
# Lots of little utilities
806806

807807

808-
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
808+
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_platform=False):
809809
""" if we have an object dtype, try to coerce dates and/or numers """
810810

811-
if values.dtype == np.object_ and convert_dates:
811+
# convert dates
812+
if convert_dates and getattr(values,'dtype',None) == np.object_:
812813

813814
# we take an aggressive stance and convert to datetime64[ns]
814815
if convert_dates == 'coerce':
@@ -821,7 +822,8 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
821822
else:
822823
values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)
823824

824-
if values.dtype == np.object_ and convert_numeric:
825+
# convert to numeric
826+
if convert_numeric and getattr(values,'dtype',None) == np.object_:
825827
try:
826828
new_values = lib.maybe_convert_numeric(values,set(),coerce_numeric=True)
827829

@@ -832,6 +834,14 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
832834
except:
833835
pass
834836

837+
# platform conversion
838+
# allow ndarray or list here
839+
if convert_platform:
840+
if isinstance(values, (list,tuple)):
841+
values = lib.list_to_object_array(values)
842+
if values.dtype == np.object_:
843+
values = lib.maybe_convert_objects(values)
844+
835845
return values
836846

837847

pandas/core/frame.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -5460,11 +5460,21 @@ def _prep_ndarray(values, copy=True):
54605460
if len(values) == 0:
54615461
return np.empty((0, 0), dtype=object)
54625462

5463-
arr = np.asarray(values)
5464-
# NumPy strings are a pain, convert to object
5465-
if issubclass(arr.dtype.type, basestring):
5466-
arr = np.array(values, dtype=object, copy=True)
5467-
values = arr
5463+
def convert(v):
5464+
return com._possibly_convert_objects(v,
5465+
convert_dates=False,
5466+
convert_numeric=False,
5467+
convert_platform=True)
5468+
5469+
5470+
# we could have a 1-dim or 2-dim list here
5471+
# this is equiv of np.asarray, but does object conversion
5472+
# and platform dtype preservation
5473+
if com.is_list_like(values[0]) or hasattr(values[0],'len'):
5474+
values = np.array([ convert(v) for v in values])
5475+
else:
5476+
values = convert(values)
5477+
54685478
else:
54695479
# drop subclass info, do not copy data
54705480
values = np.asarray(values)

pandas/core/series.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -3111,11 +3111,15 @@ def _try_cast(arr):
31113111
raise
31123112
subarr = pa.array(data, dtype=object, copy=copy)
31133113
subarr = lib.maybe_convert_objects(subarr)
3114-
subarr = com._possibly_cast_to_datetime(subarr, dtype)
3114+
31153115
else:
3116-
subarr = lib.list_to_object_array(data)
3117-
subarr = lib.maybe_convert_objects(subarr)
3118-
subarr = com._possibly_cast_to_datetime(subarr, dtype)
3116+
subarr = com._possibly_convert_objects(data,
3117+
convert_dates=False,
3118+
convert_numeric=False,
3119+
convert_platform=True)
3120+
3121+
subarr = com._possibly_cast_to_datetime(subarr, dtype)
3122+
31193123
else:
31203124
subarr = _try_cast(data)
31213125

pandas/tests/test_frame.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -8133,12 +8133,43 @@ def test_constructor_with_datetimes(self):
81338133
expected.sort()
81348134
assert_series_equal(result, expected)
81358135

8136-
# GH #2751 (construction with no index specified)
8137-
df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3], 'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)] })
8136+
def test_constructor_for_list_with_dtypes(self):
8137+
intname = np.dtype(np.int_).name
8138+
floatname = np.dtype(np.float_).name
8139+
datetime64name = np.dtype('M8[ns]').name
8140+
objectname = np.dtype(np.object_).name
8141+
8142+
# test list of lists/ndarrays
8143+
df = DataFrame([np.arange(5) for x in range(5)])
8144+
result = df.get_dtype_counts()
8145+
expected = Series({'int64' : 5})
8146+
8147+
df = DataFrame([np.array(np.arange(5),dtype='int32') for x in range(5)])
8148+
result = df.get_dtype_counts()
8149+
expected = Series({'int32' : 5})
8150+
8151+
# overflow issue? (we always expecte int64 upcasting here)
8152+
df = DataFrame({'a' : [2**31,2**31+1]})
8153+
result = df.get_dtype_counts()
8154+
expected = Series({'int64' : 1 })
8155+
assert_series_equal(result, expected)
8156+
8157+
# GH #2751 (construction with no index specified), make sure we cast to platform values
8158+
df = DataFrame([1, 2])
8159+
result = df.get_dtype_counts()
8160+
expected = Series({'int64': 1 })
8161+
assert_series_equal(result, expected)
8162+
8163+
df = DataFrame({'a' : [1, 2]})
8164+
result = df.get_dtype_counts()
8165+
expected = Series({'int64': 1 })
8166+
assert_series_equal(result, expected)
8167+
8168+
df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3],
8169+
'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)],
8170+
'e' : [1.,2,4.,7]})
81388171
result = df.get_dtype_counts()
8139-
# TODO: fix this on 32-bit (or decide it's ok behavior?)
8140-
# expected = Series({intname: 1, floatname : 1, datetime64name: 1, objectname : 1})
8141-
expected = Series({'int64': 1, floatname : 1, datetime64name: 1, objectname : 1})
8172+
expected = Series({'int64': 1, 'float64' : 2, datetime64name: 1, objectname : 1})
81428173
result.sort()
81438174
expected.sort()
81448175
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)