diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d1ede95527029..142a034c4575a 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1312,6 +1312,8 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, If None then parse all columns, If int then indicates last column to be parsed If list of ints then indicates list of column numbers to be parsed + If string then indicates comma separated list of column names and + column ranges (e.g. "A:E" or "A,C,E:F") na_values : list-like, default None List of additional strings to recognize as NA/NaN @@ -1336,8 +1338,34 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, skip_footer=skip_footer) def _should_parse(self, i, parse_cols): + + def _range2cols(areas): + """ + Convert comma separated list of column names and column ranges to a + list of 0-based column indexes. + + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + def _excel2num(x): + "Convert Excel column name like 'AB' to 0-based column index" + return reduce(lambda s,a: s*26+ord(a)-ord('A')+1, x.upper().strip(), 0)-1 + + cols = [] + for rng in areas.split(','): + if ':' in rng: + rng = rng.split(':') + cols += range(_excel2num(rng[0]), _excel2num(rng[1])+1) + else: + cols.append(_excel2num(rng)) + return cols + if isinstance(parse_cols, int): return i <= parse_cols + elif isinstance(parse_cols, basestring): + return i in _range2cols(parse_cols) else: return i in parse_cols diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index ebd1b9489c6ea..95d9f0517a6c5 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -861,6 +861,48 @@ def test_parse_cols_list(self): assert_frame_equal(df, df2) assert_frame_equal(df3, df2) + def test_parse_cols_str(self): + _skip_if_no_openpyxl() + _skip_if_no_xlrd() + + suffix = ['', 'x'] + + for s in suffix: + + pth = os.path.join(self.dirpath, 'test.xls%s' % s) + xls = ExcelFile(pth) + + df = xls.parse('Sheet1', index_col=0, parse_dates=True, + parse_cols='A:D') + df2 = read_csv(self.csv1, index_col=0, parse_dates=True) + df2 = df2.reindex(columns=['A', 'B', 'C']) + df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, + parse_dates=True, parse_cols='A:D') + assert_frame_equal(df, df2) + assert_frame_equal(df3, df2) + del df, df2, df3 + + df = xls.parse('Sheet1', index_col=0, parse_dates=True, + parse_cols='A,C,D') + df2 = read_csv(self.csv1, index_col=0, parse_dates=True) + df2 = df2.reindex(columns=['B', 'C']) + df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, + parse_dates=True, + parse_cols='A,C,D') + assert_frame_equal(df, df2) + assert_frame_equal(df3, df2) + del df, df2, df3 + + df = xls.parse('Sheet1', index_col=0, parse_dates=True, + parse_cols='A,C:D') + df2 = read_csv(self.csv1, index_col=0, parse_dates=True) + df2 = df2.reindex(columns=['B', 'C']) + df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, + parse_dates=True, + parse_cols='A,C:D') + assert_frame_equal(df, df2) + assert_frame_equal(df3, df2) + def test_read_table_unicode(self): fin = StringIO('\u0141aski, Jan;1') df1 = read_table(fin, sep=";", encoding="utf-8", header=None)