Skip to content

Commit e63cbd7

Browse files
committed
BUG: DataFrame can handle lists of tuples just like Series, a bit of refactoring for code reuse. GH #293
1 parent 5abb534 commit e63cbd7

File tree

3 files changed

+58
-55
lines changed

3 files changed

+58
-55
lines changed

pandas/core/frame.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3199,6 +3199,8 @@ def _rec_to_dict(arr):
31993199
return columns, sdict
32003200

32013201
def _homogenize(data, index, columns, dtype=None):
3202+
from pandas.core.series import _sanitize_array
3203+
32023204
homogenized = {}
32033205

32043206
if dtype is not None:
@@ -3225,23 +3227,9 @@ def _homogenize(data, index, columns, dtype=None):
32253227
else:
32263228
if isinstance(v, dict):
32273229
v = [v.get(i, nan) for i in index]
3228-
elif np.isscalar(v):
3229-
_v = np.empty(len(index), dtype=_infer_dtype(v))
3230-
_v.fill(v)
3231-
v = _v
3232-
else:
3233-
assert(len(v) == len(index))
32343230

3235-
# only *attempt* to cast to dtype
3236-
try:
3237-
arr = np.asarray(v, dtype=dtype)
3238-
3239-
# prevent NumPy from casting things to string when it shouldn't
3240-
if issubclass(arr.dtype.type, basestring):
3241-
arr = np.array(v, dtype=object, copy=False)
3242-
v = arr
3243-
except Exception:
3244-
v = np.asarray(v)
3231+
v = _sanitize_array(v, index, dtype=dtype, copy=False,
3232+
raise_cast_failure=False)
32453233

32463234
homogenized[k] = v
32473235

pandas/core/series.py

Lines changed: 49 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -104,49 +104,15 @@ def __new__(cls, data, index=None, dtype=None, name=None, copy=False):
104104
index = Index(sorted(data.keys()))
105105
data = [data.get(idx, np.nan) for idx in index]
106106

107-
try:
108-
subarr = np.array(data, dtype=dtype, copy=copy)
109-
except ValueError:
110-
if dtype:
111-
raise
112-
else: # pragma: no cover
113-
subarr = np.array(data, dtype=object)
114-
115-
if subarr.ndim == 0:
116-
if isinstance(data, list): # pragma: no cover
117-
subarr = np.array(data, dtype=object)
118-
elif index is not None:
119-
value = data
120-
121-
# If we create an empty array using a string to infer
122-
# the dtype, NumPy will only allocate one character per entry
123-
# so this is kind of bad. Alternately we could use np.repeat
124-
# instead of np.empty (but then you still don't want things
125-
# coming out as np.str_!
126-
if isinstance(value, basestring) and dtype is None:
127-
dtype = np.object_
128-
129-
if dtype is None:
130-
subarr = np.empty(len(index), dtype=type(value))
131-
else:
132-
subarr = np.empty(len(index), dtype=dtype)
133-
subarr.fill(value)
134-
else:
135-
return subarr.item()
136-
elif subarr.ndim > 1:
137-
if isinstance(data, np.ndarray):
138-
raise Exception('Data must be 1-dimensional')
139-
else:
140-
subarr = _asarray_tuplesafe(data, dtype=dtype)
107+
subarr = _sanitize_array(data, index, dtype, copy,
108+
raise_cast_failure=True)
109+
110+
if not isinstance(subarr, np.ndarray):
111+
return subarr
141112

142113
if index is None:
143114
index = _default_index(len(subarr))
144115

145-
# This is to prevent mixed-type Series getting all casted to
146-
# NumPy string type, e.g. NaN --> '-1#IND'.
147-
if issubclass(subarr.dtype.type, basestring):
148-
subarr = np.array(data, dtype=object, copy=copy)
149-
150116
# Change the class of the array to be the subclass type.
151117
subarr = subarr.view(cls)
152118
subarr.index = index
@@ -2001,6 +1967,50 @@ def remove_na(arr):
20011967
return arr[notnull(arr)]
20021968

20031969

1970+
def _sanitize_array(data, index, dtype=None, copy=False,
1971+
raise_cast_failure=False):
1972+
try:
1973+
subarr = np.array(data, dtype=dtype, copy=copy)
1974+
except (ValueError, TypeError):
1975+
if dtype and raise_cast_failure:
1976+
raise
1977+
else: # pragma: no cover
1978+
subarr = np.array(data, dtype=object)
1979+
1980+
if subarr.ndim == 0:
1981+
if isinstance(data, list): # pragma: no cover
1982+
subarr = np.array(data, dtype=object)
1983+
elif index is not None:
1984+
value = data
1985+
1986+
# If we create an empty array using a string to infer
1987+
# the dtype, NumPy will only allocate one character per entry
1988+
# so this is kind of bad. Alternately we could use np.repeat
1989+
# instead of np.empty (but then you still don't want things
1990+
# coming out as np.str_!
1991+
if isinstance(value, basestring) and dtype is None:
1992+
dtype = np.object_
1993+
1994+
if dtype is None:
1995+
subarr = np.empty(len(index), dtype=type(value))
1996+
else:
1997+
subarr = np.empty(len(index), dtype=dtype)
1998+
subarr.fill(value)
1999+
else:
2000+
return subarr.item()
2001+
elif subarr.ndim > 1:
2002+
if isinstance(data, np.ndarray):
2003+
raise Exception('Data must be 1-dimensional')
2004+
else:
2005+
subarr = _asarray_tuplesafe(data, dtype=dtype)
2006+
2007+
# This is to prevent mixed-type Series getting all casted to
2008+
# NumPy string type, e.g. NaN --> '-1#IND'.
2009+
if issubclass(subarr.dtype.type, basestring):
2010+
subarr = np.array(data, dtype=object, copy=copy)
2011+
2012+
return subarr
2013+
20042014
def _get_rename_function(mapper):
20052015
if isinstance(mapper, (dict, Series)):
20062016
def f(x):

pandas/tests/test_frame.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,11 @@ def test_constructor_mixed_dict_and_Series(self):
11191119
result = DataFrame(data)
11201120
self.assert_(result.index.is_monotonic)
11211121

1122+
def test_constructor_tuples(self):
1123+
result = DataFrame({'A': [(1, 2), (3, 4)]})
1124+
expected = DataFrame({'A': Series([(1, 2), (3, 4)])})
1125+
assert_frame_equal(result, expected)
1126+
11221127
def test_astype(self):
11231128
casted = self.frame.astype(int)
11241129
expected = DataFrame(self.frame.values.astype(int),

0 commit comments

Comments
 (0)