From d0bcb06ba0213a369cd4d7682084b0f4cd440239 Mon Sep 17 00:00:00 2001 From: Jonas Buyl Date: Thu, 6 Aug 2015 14:37:29 +0200 Subject: [PATCH] Fix column reordering --- doc/source/whatsnew/v0.17.0.txt | 2 +- pandas/io/stata.py | 14 ++++++-------- pandas/io/tests/test_stata.py | 8 ++++++++ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 9a9054fcf0489..aa96aacd6f0dd 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -598,7 +598,7 @@ Bug Fixes - Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`) - +- Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`) - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`). - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0266e2beeca40..5afbc2671e3a7 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1614,14 +1614,12 @@ def _do_select_columns(self, data, columns): typlist = [] fmtlist = [] lbllist = [] - matched = set() - for i, col in enumerate(data.columns): - if col in column_set: - matched.update([col]) - dtyplist.append(self.dtyplist[i]) - typlist.append(self.typlist[i]) - fmtlist.append(self.fmtlist[i]) - lbllist.append(self.lbllist[i]) + for col in columns: + i = data.columns.get_loc(col) + dtyplist.append(self.dtyplist[i]) + typlist.append(self.typlist[i]) + fmtlist.append(self.fmtlist[i]) + lbllist.append(self.lbllist[i]) self.dtyplist = dtyplist self.typlist = typlist diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index aa9f27d1515d3..5b934bad38bd3 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -782,6 +782,14 @@ def test_drop_column(self): columns=columns) tm.assert_frame_equal(expected, dropped) + + # See PR 10757 + columns = ['int_', 'long_', 'byte_'] + expected = expected[columns] + reordered = read_stata(self.dta15_117, convert_dates=True, + columns=columns) + tm.assert_frame_equal(expected, reordered) + with tm.assertRaises(ValueError): columns = ['byte_', 'byte_'] read_stata(self.dta15_117, convert_dates=True, columns=columns)