Skip to content

Commit 21bad0f

Browse files
committed
BUG: proper type inference with list of lists passed to DataFrame constructor, from_records type-handling fixes, GH #484
1 parent 32c5fe4 commit 21bad0f

File tree

7 files changed

+50
-30
lines changed

7 files changed

+50
-30
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pandas 0.6.1
5858
matrices (GH #189)
5959
- Add `margins` option to `pivot_table` for computing subgroup aggregates (GH
6060
#114)
61+
- Add `Series.from_csv` function (PR #482)
6162

6263
**Improvements to existing features**
6364

@@ -129,6 +130,7 @@ Thanks
129130
- Chang She
130131
- Ted Square
131132
- Chris Uga
133+
- Dieter Vandenbussche
132134

133135
pandas 0.6.0
134136
============

pandas/core/frame.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
207207
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
208208
copy=copy)
209209
elif isinstance(data, list):
210-
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
211-
copy=copy)
210+
if isinstance(data[0], (list, tuple)):
211+
data, columns = _list_to_sdict(data, columns)
212+
mgr = self._init_dict(data, index, columns, dtype=dtype)
213+
else:
214+
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
215+
copy=copy)
212216
else:
213217
raise PandasError('DataFrame constructor not properly called!')
214218

@@ -528,20 +532,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
528532
if isinstance(data, (np.ndarray, DataFrame, dict)):
529533
columns, sdict = _rec_to_dict(data)
530534
else:
531-
if isinstance(data[0], tuple):
532-
content = list(lib.to_object_array_tuples(data).T)
533-
else:
534-
# list of lists
535-
content = list(lib.to_object_array(data).T)
536-
537-
if columns is None:
538-
columns = range(len(content))
539-
else:
540-
assert(len(columns) == len(content))
541-
542-
sdict = dict((c, lib.maybe_convert_objects(vals))
543-
for c, vals in zip(columns, content))
544-
del content
535+
sdict, columns = _list_to_sdict(data, columns)
545536

546537
if exclude is None:
547538
exclude = set()
@@ -3547,6 +3538,22 @@ def _rec_to_dict(arr):
35473538

35483539
return columns, sdict
35493540

3541+
def _list_to_sdict(data, columns):
3542+
if isinstance(data[0], tuple):
3543+
content = list(lib.to_object_array_tuples(data).T)
3544+
else:
3545+
# list of lists
3546+
content = list(lib.to_object_array(data).T)
3547+
3548+
if columns is None:
3549+
columns = range(len(content))
3550+
else:
3551+
assert(len(columns) == len(content))
3552+
3553+
sdict = dict((c, lib.maybe_convert_objects(vals))
3554+
for c, vals in zip(columns, content))
3555+
return sdict, columns
3556+
35503557
def _homogenize(data, index, columns, dtype=None):
35513558
from pandas.core.series import _sanitize_array
35523559

pandas/src/parsing.pyx

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
8585
for i from 0 <= i < n:
8686
val = values[i]
8787

88-
if cpython.PyFloat_Check(val):
88+
if util.is_float_object(val):
8989
floats[i] = val
9090
seen_float = 1
9191
elif val in na_values:
@@ -144,18 +144,18 @@ def maybe_convert_objects(ndarray[object] objects):
144144
seen_null = 1
145145
objects[i] = onan
146146
floats[i] = fnan
147-
elif cpython.PyBool_Check(val):
147+
elif util.is_bool_object(val):
148148
seen_bool = 1
149149
bools[i] = val
150-
elif is_integer_object(val):
150+
elif util.is_integer_object(val):
151151
seen_int = 1
152152
floats[i] = <float64_t> val
153153
if not seen_null:
154154
ints[i] = val
155-
elif cpython.PyFloat_Check(val):
155+
elif util.is_float_object(val):
156156
floats[i] = val
157157
seen_float = 1
158-
elif not (cpython.PyString_Check(val) or cpython.PyUnicode_Check(val)):
158+
elif not util.is_string_object(val):
159159
# this will convert Decimal objects
160160
try:
161161
floats[i] = float(val)
@@ -173,14 +173,16 @@ def maybe_convert_objects(ndarray[object] objects):
173173
else:
174174
if seen_object:
175175
return objects
176-
elif seen_int:
177-
return ints
178-
elif seen_float:
179-
return floats
180-
elif seen_bool:
181-
return bools.view(np.bool_)
176+
elif not seen_bool:
177+
if seen_float:
178+
return floats
179+
elif seen_int:
180+
return ints
182181
else:
183-
return objects
182+
if not seen_float and not seen_int:
183+
return bools.view(np.bool_)
184+
185+
return objects
184186

185187
convert_sql_column = maybe_convert_objects
186188

pandas/src/tseries.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ cdef double_t *get_double_ptr(ndarray arr):
4949

5050
return <double_t *> arr.data
5151

52-
from util cimport is_integer_object
52+
cimport util
5353

5454
cdef extern from "math.h":
5555
double sqrt(double x)

pandas/src/util.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ cimport numpy as cnp
44
cdef extern from "numpy_helper.h":
55
inline int is_integer_object(object)
66
inline int is_float_object(object)
7+
inline int is_bool_object(object)
8+
inline int is_string_object(object)
79
inline int assign_value_1d (ndarray, Py_ssize_t, object) except -1
810

911
cpdef inline object get_value_at(ndarray arr, object loc):

pandas/tests/test_frame.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,13 @@ def test_constructor_more(self):
12221222
self.assertEqual(len(dm.columns), 2)
12231223
self.assert_(dm.values.dtype == np.float64)
12241224

1225+
def test_constructor_list_of_lists(self):
1226+
# GH #484
1227+
l = [[1, 'a'], [2, 'b']]
1228+
df = DataFrame(data=l, columns=["num", "str"])
1229+
self.assert_(com.is_integer_dtype(df['num']))
1230+
self.assert_(df['str'].dtype == np.object_)
1231+
12251232
def test_constructor_ragged(self):
12261233
data = {'A' : randn(10),
12271234
'B' : randn(8)}

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
286286
tseries_depends = [srcpath(f, suffix='.pyx')
287287
for f in tseries_depends]
288288
else:
289-
tseries_depends = None
289+
tseries_depends = []
290290

291291
tseries_ext = Extension('pandas._tseries',
292292
depends=tseries_depends + ['pandas/src/numpy_helper.h'],

0 commit comments

Comments
 (0)