From 25ffa14bb0138d63d0390b643ab5bdd7873292e7 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 10 May 2013 10:59:21 -0400 Subject: [PATCH] BUG: Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_) --- RELEASE.rst | 2 ++ pandas/core/internals.py | 3 ++- pandas/tests/test_frame.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index 8e48395efc9ab..82f88a0c8e592 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -77,6 +77,7 @@ pandas 0.11.1 - ``.loc`` was not raising when passed an integer list (GH3449_) - Unordered time series selection was misbehaving when using label slicing (GH3448_) - Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_) + - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_) - Fix sorting in a frame with a list of columns which contains datetime64[ns] dtypes (GH3461_) - DataFrames fetched via FRED now handle '.' as a NaN. (GH3469_) - Fix regression in a DataFrame apply with axis=1, objects were not being converted back @@ -137,6 +138,7 @@ pandas 0.11.1 .. _GH3495: https://github.com/pydata/pandas/issues/3495 .. _GH3492: https://github.com/pydata/pandas/issues/3492 .. _GH3552: https://github.com/pydata/pandas/issues/3552 +.. _GH3562: https://github.com/pydata/pandas/issues/3562 .. _GH3493: https://github.com/pydata/pandas/issues/3493 diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 13e1654963844..b6459b0e461b4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1960,7 +1960,8 @@ def form_blocks(arrays, names, axes): items = axes[0] if len(arrays) < len(items): - extra_items = items - Index(names) + nn = set(names) + extra_items = Index([ i for i in items if i not in nn ]) else: extra_items = [] diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0c9dd21d2f645..7e7813e048bd1 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3518,6 +3518,16 @@ def test_from_records_misc_brokenness(self): results = df2_obj.get_dtype_counts() expected = Series({ 'datetime64[ns]' : 1, 'int64' : 1 }) + def test_from_records_empty(self): + # 3562 + result = DataFrame.from_records([], columns=['a','b','c']) + expected = DataFrame(columns=['a','b','c']) + assert_frame_equal(result, expected) + + result = DataFrame.from_records([], columns=['a','b','b']) + expected = DataFrame(columns=['a','b','b']) + assert_frame_equal(result, expected) + def test_to_records_floats(self): df = DataFrame(np.random.rand(10, 10)) df.to_records()