Skip to content

Commit a37900e

Browse files
committed
Merge pull request #5742 from immerrr/dont-lose-dtype-concatenating-empty-arraylikes
BUG: don't lose dtypes when concatenating empty array-likes
2 parents 0cd0c68 + 3db6d3a commit a37900e

File tree

4 files changed

+44
-14
lines changed

4 files changed

+44
-14
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ Bug Fixes
118118
- Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`)
119119
- Bug in scipy ``interpolate`` methods with a datetime index (:issue:`5975`)
120120
- Bug in NaT comparison if a mixed datetime/np.datetime64 with NaT were passed (:issue:`5968`)
121+
- Fixed bug with ``pd.concat`` losing dtype information if all inputs are empty (:issue:`5742`)
121122

122123
pandas 0.13.0
123124
-------------

pandas/core/common.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -2326,20 +2326,23 @@ def _check_as_is(x):
23262326

23272327
def _concat_compat(to_concat, axis=0):
23282328
# filter empty arrays
2329-
to_concat = [x for x in to_concat if x.shape[axis] > 0]
2330-
2331-
# return the empty np array, if nothing to concatenate, #3121
2332-
if not to_concat:
2333-
return np.array([], dtype=object)
2334-
2335-
is_datetime64 = [x.dtype == _NS_DTYPE for x in to_concat]
2336-
if all(is_datetime64):
2337-
# work around NumPy 1.6 bug
2338-
new_values = np.concatenate([x.view(np.int64) for x in to_concat],
2339-
axis=axis)
2340-
return new_values.view(_NS_DTYPE)
2341-
elif any(is_datetime64):
2342-
to_concat = [_to_pydatetime(x) for x in to_concat]
2329+
nonempty = [x for x in to_concat if x.shape[axis] > 0]
2330+
2331+
# If all arrays are empty, there's nothing to convert, just short-cut to
2332+
# the concatenation, #3121.
2333+
#
2334+
# Creating an empty array directly is tempting, but the winnings would be
2335+
# marginal given that it would still require shape & dtype calculation and
2336+
# np.concatenate which has them both implemented is compiled.
2337+
if nonempty:
2338+
is_datetime64 = [x.dtype == _NS_DTYPE for x in nonempty]
2339+
if all(is_datetime64):
2340+
# work around NumPy 1.6 bug
2341+
new_values = np.concatenate([x.view(np.int64) for x in nonempty],
2342+
axis=axis)
2343+
return new_values.view(_NS_DTYPE)
2344+
elif any(is_datetime64):
2345+
to_concat = [_to_pydatetime(x) for x in nonempty]
23432346

23442347
return np.concatenate(to_concat, axis=axis)
23452348

pandas/tests/test_frame.py

+17
Original file line numberDiff line numberDiff line change
@@ -11909,6 +11909,23 @@ def test_to_csv_date_format(self):
1190911909

1191011910
assert_frame_equal(test, nat_frame)
1191111911

11912+
def test_concat_empty_dataframe_dtypes(self):
11913+
df = DataFrame(columns=list("abc"))
11914+
df['a'] = df['a'].astype(np.bool_)
11915+
df['b'] = df['b'].astype(np.int32)
11916+
df['c'] = df['c'].astype(np.float64)
11917+
11918+
result = pd.concat([df, df])
11919+
self.assertEqual(result['a'].dtype, np.bool_)
11920+
self.assertEqual(result['b'].dtype, np.int32)
11921+
self.assertEqual(result['c'].dtype, np.float64)
11922+
11923+
result = pd.concat([df, df.astype(np.float64)])
11924+
self.assertEqual(result['a'].dtype, np.object_)
11925+
self.assertEqual(result['b'].dtype, np.float64)
11926+
self.assertEqual(result['c'].dtype, np.float64)
11927+
11928+
1191211929
def skip_if_no_ne(engine='numexpr'):
1191311930
if engine == 'numexpr':
1191411931
try:

pandas/tests/test_series.py

+9
Original file line numberDiff line numberDiff line change
@@ -5441,6 +5441,15 @@ def test_numpy_unique(self):
54415441
# it works!
54425442
result = np.unique(self.ts)
54435443

5444+
def test_concat_empty_series_dtypes(self):
5445+
self.assertEqual(pd.concat([Series(dtype=np.float64)]).dtype, np.float64)
5446+
self.assertEqual(pd.concat([Series(dtype=np.int8)]).dtype, np.int8)
5447+
self.assertEqual(pd.concat([Series(dtype=np.bool_)]).dtype, np.bool_)
5448+
5449+
self.assertEqual(pd.concat([Series(dtype=np.bool_),
5450+
Series(dtype=np.int32)]).dtype, np.int32)
5451+
5452+
54445453

54455454
class TestSeriesNonUnique(tm.TestCase):
54465455

0 commit comments

Comments
 (0)