Skip to content

Commit 955b727

Browse files
committed
ENH: DataFrame.__init__ will accept structured arrays. general performance tweak in constructor too
1 parent 54066db commit 955b727

File tree

4 files changed

+41
-15
lines changed

4 files changed

+41
-15
lines changed

pandas/core/frame.py

+25-13
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,18 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
147147
mgr = mgr.cast(dtype)
148148
elif isinstance(data, dict):
149149
mgr = self._init_dict(data, index, columns, dtype=dtype)
150-
elif isinstance(data, (np.ndarray, list)):
151-
mgr = self._init_matrix(data, index, columns, dtype=dtype,
152-
copy=copy)
150+
elif isinstance(data, np.ndarray):
151+
if data.dtype.names:
152+
data_columns, data = _rec_to_dict(data)
153+
if columns is None:
154+
columns = data_columns
155+
mgr = self._init_dict(data, index, columns, dtype=dtype)
156+
else:
157+
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
158+
copy=copy)
159+
elif isinstance(data, list):
160+
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
161+
copy=copy)
153162
else:
154163
raise PandasError('DataFrame constructor not properly called!')
155164

@@ -183,8 +192,8 @@ def _init_dict(self, data, index, columns, dtype=None):
183192
mgr = BlockManager(blocks, [columns, index])
184193
return mgr.consolidate()
185194

186-
def _init_matrix(self, values, index, columns, dtype=None,
187-
copy=False):
195+
def _init_ndarray(self, values, index, columns, dtype=None,
196+
copy=False):
188197
values = _prep_ndarray(values, copy=copy)
189198

190199
if dtype is not None:
@@ -347,16 +356,13 @@ def from_records(cls, data, indexField=None):
347356
-------
348357
DataFrame
349358
"""
350-
# Dtype when you have records
351-
if not issubclass(data.dtype.type, np.void):
359+
if not data.dtype.names:
352360
raise Exception('Input was not a structured array!')
353361

354-
columns = data.dtype.names
355-
sdict = dict((k, data[k]) for k in columns)
356-
362+
columns, sdict = _rec_to_dict(data)
357363
if indexField is not None:
358364
index = sdict.pop(indexField)
359-
columns = [c for c in columns if c != indexField]
365+
columns.remove(indexField)
360366
else:
361367
index = np.arange(len(data))
362368

@@ -2484,6 +2490,12 @@ def _prep_ndarray(values, copy=True):
24842490

24852491
return values
24862492

2493+
2494+
def _rec_to_dict(arr):
2495+
columns = list(arr.dtype.names)
2496+
sdict = dict((k, arr[k]) for k in columns)
2497+
return columns, sdict
2498+
24872499
def _homogenize_series(data, index, dtype=None):
24882500
homogenized = {}
24892501

@@ -2507,9 +2519,9 @@ def _homogenize_series(data, index, dtype=None):
25072519

25082520
# only *attempt* to cast to dtype
25092521
try:
2510-
v = Series(v, dtype=dtype, index=index)
2522+
v = np.asarray(v, dtype=dtype)
25112523
except Exception:
2512-
v = Series(v, index=index)
2524+
v = np.asarray(v)
25132525

25142526
homogenized[k] = v
25152527

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ def _simple_blockify(dct, ref_items, dtype):
731731

732732
def _stack_dict(dct, ref_items):
733733
items = [x for x in ref_items if x in dct]
734-
stacked = np.vstack([dct[k].values for k in items])
734+
stacked = np.vstack([np.asarray(dct[k]) for k in items])
735735
return items, stacked
736736

737737
def _blocks_to_series_dict(blocks, index=None):

pandas/core/sparse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,7 @@ def density(self):
783783
#----------------------------------------------------------------------
784784
# Support different internal rep'n of SparseDataFrame
785785

786-
def _insert_item(self, key, value):
786+
def _set_item(self, key, value):
787787
sp_maker = lambda x: SparseSeries(x, index=self.index,
788788
fill_value=self.default_fill_value,
789789
kind=self.default_kind)

pandas/tests/test_frame.py

+14
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,20 @@ def test_constructor_mixed(self):
626626

627627
self.assertEqual(self.mixed_frame['foo'].dtype, np.object_)
628628

629+
def test_constructor_rec(self):
630+
rec = self.frame.to_records(index=False)
631+
632+
rec.dtype.names = list(rec.dtype.names)[::-1]
633+
634+
index = self.frame.index
635+
636+
df = DataFrame(rec)
637+
self.assert_(np.array_equal(df.columns, rec.dtype.names))
638+
639+
df2 = DataFrame(rec, index=index)
640+
self.assert_(np.array_equal(df2.columns, rec.dtype.names))
641+
self.assert_(df2.index.equals(index))
642+
629643
def test_constructor_bool(self):
630644
df = DataFrame({0 : np.ones(10, dtype=bool),
631645
1 : np.zeros(10, dtype=bool)})

0 commit comments

Comments
 (0)