|
16 | 16 | from pandas.core.frame import DataFrame
|
17 | 17 | from pandas.io.parsers import TextParser
|
18 | 18 | from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
|
19 |
| - EmptyDataError, get_filepath_or_buffer) |
| 19 | + EmptyDataError, get_filepath_or_buffer, |
| 20 | + _NA_VALUES) |
20 | 21 | from pandas.tseries.period import Period
|
21 | 22 | from pandas import json
|
22 | 23 | from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
|
|
27 | 28 | import pandas.compat.openpyxl_compat as openpyxl_compat
|
28 | 29 | from warnings import warn
|
29 | 30 | from distutils.version import LooseVersion
|
| 31 | +from pandas.util.decorators import Appender |
30 | 32 |
|
31 | 33 | __all__ = ["read_excel", "ExcelWriter", "ExcelFile"]
|
32 | 34 |
|
33 | 35 | _writer_extensions = ["xlsx", "xls", "xlsm"]
|
34 | 36 | _writers = {}
|
35 | 37 |
|
| 38 | +_read_excel_doc = """ |
| 39 | +Read an Excel table into a pandas DataFrame |
| 40 | +
|
| 41 | +Parameters |
| 42 | +---------- |
| 43 | +io : string, path object (pathlib.Path or py._path.local.LocalPath), |
| 44 | + file-like object, pandas ExcelFile, or xlrd workbook. |
| 45 | + The string could be a URL. Valid URL schemes include http, ftp, s3, |
| 46 | + and file. For file URLs, a host is expected. For instance, a local |
| 47 | + file could be file://localhost/path/to/workbook.xlsx |
| 48 | +sheetname : string, int, mixed list of strings/ints, or None, default 0 |
| 49 | +
|
| 50 | + Strings are used for sheet names, Integers are used in zero-indexed |
| 51 | + sheet positions. |
| 52 | +
|
| 53 | + Lists of strings/integers are used to request multiple sheets. |
| 54 | +
|
| 55 | + Specify None to get all sheets. |
| 56 | +
|
| 57 | + str|int -> DataFrame is returned. |
| 58 | + list|None -> Dict of DataFrames is returned, with keys representing |
| 59 | + sheets. |
| 60 | +
|
| 61 | + Available Cases |
| 62 | +
|
| 63 | + * Defaults to 0 -> 1st sheet as a DataFrame |
| 64 | + * 1 -> 2nd sheet as a DataFrame |
| 65 | + * "Sheet1" -> 1st sheet as a DataFrame |
| 66 | + * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames |
| 67 | + * None -> All sheets as a dictionary of DataFrames |
| 68 | +
|
| 69 | +header : int, list of ints, default 0 |
| 70 | + Row (0-indexed) to use for the column labels of the parsed |
| 71 | + DataFrame. If a list of integers is passed those row positions will |
| 72 | + be combined into a ``MultiIndex`` |
| 73 | +skiprows : list-like |
| 74 | + Rows to skip at the beginning (0-indexed) |
| 75 | +skip_footer : int, default 0 |
| 76 | + Rows at the end to skip (0-indexed) |
| 77 | +index_col : int, list of ints, default None |
| 78 | + Column (0-indexed) to use as the row labels of the DataFrame. |
| 79 | + Pass None if there is no such column. If a list is passed, |
| 80 | + those columns will be combined into a ``MultiIndex`` |
| 81 | +names : array-like, default None |
| 82 | + List of column names to use. If file contains no header row, |
| 83 | + then you should explicitly pass header=None |
| 84 | +converters : dict, default None |
| 85 | + Dict of functions for converting values in certain columns. Keys can |
| 86 | + either be integers or column labels, values are functions that take one |
| 87 | + input argument, the Excel cell content, and return the transformed |
| 88 | + content. |
| 89 | +parse_cols : int or list, default None |
| 90 | + * If None then parse all columns, |
| 91 | + * If int then indicates last column to be parsed |
| 92 | + * If list of ints then indicates list of column numbers to be parsed |
| 93 | + * If string then indicates comma separated list of column names and |
| 94 | + column ranges (e.g. "A:E" or "A,C,E:F") |
| 95 | +squeeze : boolean, default False |
| 96 | + If the parsed data only contains one column then return a Series |
| 97 | +na_values : str or list-like or dict, default None |
| 98 | + Additional strings to recognize as NA/NaN. If dict passed, specific |
| 99 | + per-column NA values. By default the following values are interpreted |
| 100 | + as NaN: '""" + "', '".join(sorted(_NA_VALUES)) + """'. |
| 101 | +thousands : str, default None |
| 102 | + Thousands separator for parsing string columns to numeric. Note that |
| 103 | + this parameter is only necessary for columns stored as TEXT in Excel, |
| 104 | + any numeric columns will automatically be parsed, regardless of display |
| 105 | + format. |
| 106 | +keep_default_na : bool, default True |
| 107 | + If na_values are specified and keep_default_na is False the default NaN |
| 108 | + values are overridden, otherwise they're appended to. |
| 109 | +verbose : boolean, default False |
| 110 | + Indicate number of NA values placed in non-numeric columns |
| 111 | +engine: string, default None |
| 112 | + If io is not a buffer or path, this must be set to identify io. |
| 113 | + Acceptable values are None or xlrd |
| 114 | +convert_float : boolean, default True |
| 115 | + convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric |
| 116 | + data will be read in as floats: Excel stores all numbers as floats |
| 117 | + internally |
| 118 | +has_index_names : boolean, default None |
| 119 | + DEPRECATED: for version 0.17+ index names will be automatically |
| 120 | + inferred based on index_col. To read Excel output from 0.16.2 and |
| 121 | + prior that had saved index names, use True. |
| 122 | +
|
| 123 | +Returns |
| 124 | +------- |
| 125 | +parsed : DataFrame or Dict of DataFrames |
| 126 | + DataFrame from the passed in Excel file. See notes in sheetname |
| 127 | + argument for more information on when a Dict of Dataframes is returned. |
| 128 | +""" |
| 129 | + |
36 | 130 |
|
37 | 131 | def register_writer(klass):
|
38 | 132 | """Adds engine to the excel writer registry. You must use this method to
|
@@ -74,100 +168,13 @@ def get_writer(engine_name):
|
74 | 168 | raise ValueError("No Excel writer '%s'" % engine_name)
|
75 | 169 |
|
76 | 170 |
|
| 171 | +@Appender(_read_excel_doc) |
77 | 172 | def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
|
78 | 173 | index_col=None, names=None, parse_cols=None, parse_dates=False,
|
79 | 174 | date_parser=None, na_values=None, thousands=None,
|
80 | 175 | convert_float=True, has_index_names=None, converters=None,
|
81 | 176 | engine=None, squeeze=False, **kwds):
|
82 |
| - """ |
83 |
| - Read an Excel table into a pandas DataFrame |
84 |
| -
|
85 |
| - Parameters |
86 |
| - ---------- |
87 |
| - io : string, path object (pathlib.Path or py._path.local.LocalPath), |
88 |
| - file-like object, pandas ExcelFile, or xlrd workbook. |
89 |
| - The string could be a URL. Valid URL schemes include http, ftp, s3, |
90 |
| - and file. For file URLs, a host is expected. For instance, a local |
91 |
| - file could be file://localhost/path/to/workbook.xlsx |
92 |
| - sheetname : string, int, mixed list of strings/ints, or None, default 0 |
93 |
| -
|
94 |
| - Strings are used for sheet names, Integers are used in zero-indexed |
95 |
| - sheet positions. |
96 |
| -
|
97 |
| - Lists of strings/integers are used to request multiple sheets. |
98 |
| -
|
99 |
| - Specify None to get all sheets. |
100 |
| -
|
101 |
| - str|int -> DataFrame is returned. |
102 |
| - list|None -> Dict of DataFrames is returned, with keys representing |
103 |
| - sheets. |
104 |
| -
|
105 |
| - Available Cases |
106 |
| -
|
107 |
| - * Defaults to 0 -> 1st sheet as a DataFrame |
108 |
| - * 1 -> 2nd sheet as a DataFrame |
109 |
| - * "Sheet1" -> 1st sheet as a DataFrame |
110 |
| - * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames |
111 |
| - * None -> All sheets as a dictionary of DataFrames |
112 |
| -
|
113 |
| - header : int, list of ints, default 0 |
114 |
| - Row (0-indexed) to use for the column labels of the parsed |
115 |
| - DataFrame. If a list of integers is passed those row positions will |
116 |
| - be combined into a ``MultiIndex`` |
117 |
| - skiprows : list-like |
118 |
| - Rows to skip at the beginning (0-indexed) |
119 |
| - skip_footer : int, default 0 |
120 |
| - Rows at the end to skip (0-indexed) |
121 |
| - index_col : int, list of ints, default None |
122 |
| - Column (0-indexed) to use as the row labels of the DataFrame. |
123 |
| - Pass None if there is no such column. If a list is passed, |
124 |
| - those columns will be combined into a ``MultiIndex`` |
125 |
| - names : array-like, default None |
126 |
| - List of column names to use. If file contains no header row, |
127 |
| - then you should explicitly pass header=None |
128 |
| - converters : dict, default None |
129 |
| - Dict of functions for converting values in certain columns. Keys can |
130 |
| - either be integers or column labels, values are functions that take one |
131 |
| - input argument, the Excel cell content, and return the transformed |
132 |
| - content. |
133 |
| - parse_cols : int or list, default None |
134 |
| - * If None then parse all columns, |
135 |
| - * If int then indicates last column to be parsed |
136 |
| - * If list of ints then indicates list of column numbers to be parsed |
137 |
| - * If string then indicates comma separated list of column names and |
138 |
| - column ranges (e.g. "A:E" or "A,C,E:F") |
139 |
| - squeeze : boolean, default False |
140 |
| - If the parsed data only contains one column then return a Series |
141 |
| - na_values : list-like, default None |
142 |
| - List of additional strings to recognize as NA/NaN |
143 |
| - thousands : str, default None |
144 |
| - Thousands separator for parsing string columns to numeric. Note that |
145 |
| - this parameter is only necessary for columns stored as TEXT in Excel, |
146 |
| - any numeric columns will automatically be parsed, regardless of display |
147 |
| - format. |
148 |
| - keep_default_na : bool, default True |
149 |
| - If na_values are specified and keep_default_na is False the default NaN |
150 |
| - values are overridden, otherwise they're appended to |
151 |
| - verbose : boolean, default False |
152 |
| - Indicate number of NA values placed in non-numeric columns |
153 |
| - engine: string, default None |
154 |
| - If io is not a buffer or path, this must be set to identify io. |
155 |
| - Acceptable values are None or xlrd |
156 |
| - convert_float : boolean, default True |
157 |
| - convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric |
158 |
| - data will be read in as floats: Excel stores all numbers as floats |
159 |
| - internally |
160 |
| - has_index_names : boolean, default None |
161 |
| - DEPRECATED: for version 0.17+ index names will be automatically |
162 |
| - inferred based on index_col. To read Excel output from 0.16.2 and |
163 |
| - prior that had saved index names, use True. |
164 | 177 |
|
165 |
| - Returns |
166 |
| - ------- |
167 |
| - parsed : DataFrame or Dict of DataFrames |
168 |
| - DataFrame from the passed in Excel file. See notes in sheetname |
169 |
| - argument for more information on when a Dict of Dataframes is returned. |
170 |
| - """ |
171 | 178 | if not isinstance(io, ExcelFile):
|
172 | 179 | io = ExcelFile(io, engine=engine)
|
173 | 180 |
|
|
0 commit comments