|
39 | 39 | _writers = {}
|
40 | 40 |
|
41 | 41 | _read_excel_doc = """
|
42 |
| -Read an Excel table into a pandas DataFrame. |
| 42 | +Read an Excel file into a pandas DataFrame. |
43 | 43 |
|
44 | 44 | Support both `xls` and `xlsx` file extensions from a local filesystem or url.
|
| 45 | +Support an option to read a single sheet or a list of sheets. |
45 | 46 |
|
46 | 47 | Parameters
|
47 | 48 | ---------
|
48 |
| -io : string, path object (pathlib.Path or py._path.local.LocalPath), |
49 |
| - file-like object, pandas ExcelFile, or xlrd workbook. |
| 49 | +io : str, file descriptor, pathlib.Path, ExcelFile or xlrd.Book |
50 | 50 | The string could be a URL. Valid URL schemes include http, ftp, s3,
|
51 | 51 | gcs, and file. For file URLs, a host is expected. For instance, a local
|
52 |
| - file could be file://localhost/path/to/workbook.xlsx |
53 |
| -sheet_name : string, int, mixed list of strings/ints, or None, default 0 |
54 |
| -
|
| 52 | + file could be /path/to/workbook.xlsx . |
| 53 | +sheet_name : string, int, list, or None, default 0 |
55 | 54 | Strings are used for sheet names, Integers are used in zero-indexed
|
56 |
| - sheet positions. |
57 |
| -
|
58 |
| - Lists of strings/integers are used to request multiple sheets. |
59 |
| -
|
60 |
| - Specify None to get all sheets. |
| 55 | + sheet positions. Lists of strings/integers are used to request |
| 56 | + multiple sheets. Specify None to get all sheets. |
61 | 57 |
|
62 |
| - str|int -> DataFrame is returned. |
63 |
| - list|None -> Dict of DataFrames is returned, with keys representing |
64 |
| - sheets. |
| 58 | + Available cases: |
65 | 59 |
|
66 |
| - Available Cases |
| 60 | + * Defaults to 0 : 1st sheet, 1 : 2nd sheet, |
| 61 | + * ``"Sheet1"`` : Load sheet with name "Sheet1" |
| 62 | + * ``[0, 1, "Sheet5"]``: Load 1st, 2nd and |
| 63 | + sheet named "Sheet5" as a dict of `DataFrame` |
| 64 | + * None : All sheets |
67 | 65 |
|
68 |
| - * Defaults to 0 -> 1st sheet as a DataFrame |
69 |
| - * 1 -> 2nd sheet as a DataFrame |
70 |
| - * "Sheet1" -> 1st sheet as a DataFrame |
71 |
| - * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames |
72 |
| - * None -> All sheets as a dictionary of DataFrames |
73 |
| -
|
74 |
| -sheetname : string, int, mixed list of strings/ints, or None, default 0 |
| 66 | +sheetname : string, int, list, or None, default 0 |
| 67 | + Alias of `sheet_name`. |
75 | 68 |
|
76 | 69 | .. deprecated:: 0.21.0
|
77 | 70 | Use `sheet_name` instead
|
78 | 71 |
|
79 |
| -header : int, list of ints, default 0 |
| 72 | +header : int, list of int, default 0 |
80 | 73 | Row (0-indexed) to use for the column labels of the parsed
|
81 | 74 | DataFrame. If a list of integers is passed those row positions will
|
82 | 75 | be combined into a ``MultiIndex``. Use None if there is no header.
|
83 | 76 | names : array-like, default None
|
84 | 77 | List of column names to use. If file contains no header row,
|
85 |
| - then you should explicitly pass header=None |
86 |
| -index_col : int, list of ints, default None |
| 78 | + then you should explicitly pass header=None. |
| 79 | +index_col : int, list of int, default None |
87 | 80 | Column (0-indexed) to use as the row labels of the DataFrame.
|
88 | 81 | Pass None if there is no such column. If a list is passed,
|
89 | 82 | those columns will be combined into a ``MultiIndex``. If a
|
90 | 83 | subset of data is selected with ``usecols``, index_col
|
91 | 84 | is based on the subset.
|
92 | 85 | parse_cols : int or list, default None
|
| 86 | + Alias of `usecols`. |
93 | 87 |
|
94 | 88 | .. deprecated:: 0.21.0
|
95 |
| - Pass in `usecols` instead. |
| 89 | + Use `usecols` instead. |
96 | 90 |
|
97 | 91 | usecols : int, str, list-like, or callable default None
|
98 |
| - * If None, then parse all columns, |
99 | 92 | * If int, then indicates last column to be parsed
|
100 | 93 |
|
101 | 94 | .. deprecated:: 0.24.0
|
102 |
| - Pass in a list of ints instead from 0 to `usecols` inclusive. |
| 95 | + Pass in a list of int instead from 0 to `usecols` inclusive. |
103 | 96 |
|
| 97 | + * If None, then parse all columns, |
104 | 98 | * If string, then indicates comma separated list of Excel column letters
|
105 | 99 | and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
|
106 | 100 | both sides.
|
107 |
| - * If list of ints, then indicates list of column numbers to be parsed. |
| 101 | + * If list of int, then indicates list of column numbers to be parsed. |
108 | 102 | * If list of strings, then indicates list of column names to be parsed.
|
109 | 103 |
|
110 | 104 | .. versionadded:: 0.24.0
|
|
114 | 108 |
|
115 | 109 | .. versionadded:: 0.24.0
|
116 | 110 |
|
117 |
| -squeeze : boolean, default False |
| 111 | +squeeze : bool, default False |
118 | 112 | If the parsed data only contains one column then return a Series
|
119 | 113 | dtype : Type name or dict of column -> type, default None
|
120 | 114 | Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
|
|
124 | 118 |
|
125 | 119 | .. versionadded:: 0.20.0
|
126 | 120 |
|
127 |
| -engine : string, default None |
| 121 | +engine : str, default None |
128 | 122 | If io is not a buffer or path, this must be set to identify io.
|
129 | 123 | Acceptable values are None or xlrd
|
130 | 124 | converters : dict, default None
|
|
156 | 150 | keep_default_na : bool, default True
|
157 | 151 | If na_values are specified and keep_default_na is False the default NaN
|
158 | 152 | values are overridden, otherwise they're appended to.
|
159 |
| -verbose : boolean, default False |
| 153 | +verbose : bool, default False |
160 | 154 | Indicate number of NA values placed in non-numeric columns
|
161 | 155 | thousands : str, default None
|
162 | 156 | Thousands separator for parsing string columns to numeric. Note that
|
|
168 | 162 | argument to indicate comments in the input file. Any data between the
|
169 | 163 | comment string and the end of the current line is ignored.
|
170 | 164 | skip_footer : int, default 0
|
| 165 | + Alias to `skipfooter`. |
171 | 166 |
|
172 | 167 | .. deprecated:: 0.23.0
|
173 |
| - Pass in `skipfooter` instead. |
| 168 | + Use `skipfooter` instead. |
174 | 169 | skipfooter : int, default 0
|
175 | 170 | Rows at the end to skip (0-indexed)
|
176 |
| -convert_float : boolean, default True |
| 171 | +convert_float : bool, default True |
177 | 172 | convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
|
178 | 173 | data will be read in as floats: Excel stores all numbers as floats
|
179 |
| - internally |
180 |
| -mangle_dupe_cols : boolean, default True |
| 174 | + internally. |
| 175 | +mangle_dupe_cols : bool, default True |
181 | 176 | Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
|
182 | 177 | 'X'...'X'. Passing in False will cause data to be overwritten if there
|
183 | 178 | are duplicate names in the columns.
|
184 | 179 |
|
185 | 180 | Returns
|
186 | 181 | -------
|
187 |
| -parsed : DataFrame or Dict of DataFrames |
| 182 | +parsed : DataFrame or dict of DataFrames |
188 | 183 | DataFrame from the passed in Excel file. See notes in sheet_name
|
189 | 184 | argument for more information on when a dict of DataFrames is returned.
|
190 | 185 |
|
191 |
| -Examples |
| 186 | +See Also |
192 | 187 | --------
|
| 188 | +to_excel : Write DataFrame to an Excel file. |
| 189 | +to_csv : Write DataFrame to a comma-separated values (csv) file. |
| 190 | +read_csv : Read a comma-separated values (csv) file into DataFrame. |
| 191 | +read_fwf : Read a table of fixed-width formatted lines into DataFrame. |
193 | 192 |
|
194 |
| -An example DataFrame written to a local file |
195 |
| -
|
196 |
| ->>> df_out = pd.DataFrame([('string1', 1), |
197 |
| -... ('string2', 2), |
198 |
| -... ('string3', 3)], |
199 |
| -... columns=['Name', 'Value']) |
200 |
| ->>> df_out |
201 |
| - Name Value |
202 |
| -0 string1 1 |
203 |
| -1 string2 2 |
204 |
| -2 string3 3 |
205 |
| ->>> df_out.to_excel('tmp.xlsx') |
206 |
| -
|
| 193 | +Examples |
| 194 | +-------- |
207 | 195 | The file can be read using the file name as string or an open file object:
|
208 | 196 |
|
209 |
| ->>> pd.read_excel('tmp.xlsx') |
210 |
| - Name Value |
211 |
| -0 string1 1 |
212 |
| -1 string2 2 |
213 |
| -2 string3 3 |
| 197 | +>>> pd.read_excel('tmp.xlsx', index_col=0) # doctest: +SKIP |
| 198 | + Name Value |
| 199 | +0 string1 1 |
| 200 | +1 string2 2 |
| 201 | +2 #string3 3 |
214 | 202 |
|
215 |
| ->>> pd.read_excel(open('tmp.xlsx','rb')) |
216 |
| - Name Value |
217 |
| -0 string1 1 |
218 |
| -1 string2 2 |
219 |
| -2 string3 3 |
| 203 | +>>> pd.read_excel(open('tmp.xlsx', 'rb'), sheet_name='Sheet3') # doctest: +SKIP |
| 204 | + Unnamed: 0 Name Value |
| 205 | +0 0 string1 1 |
| 206 | +1 1 string2 2 |
| 207 | +2 2 #string3 3 |
220 | 208 |
|
221 | 209 | Index and header can be specified via the `index_col` and `header` arguments
|
222 | 210 |
|
223 |
| ->>> pd.read_excel('tmp.xlsx', index_col=None, header=None) |
224 |
| - 0 1 2 |
225 |
| -0 NaN Name Value |
226 |
| -1 0.0 string1 1 |
227 |
| -2 1.0 string2 2 |
228 |
| -3 2.0 string3 3 |
| 211 | +>>> pd.read_excel('tmp.xlsx', index_col=None, header=None) # doctest: +SKIP |
| 212 | + 0 1 2 |
| 213 | +0 NaN Name Value |
| 214 | +1 0.0 string1 1 |
| 215 | +2 1.0 string2 2 |
| 216 | +3 2.0 #string3 3 |
229 | 217 |
|
230 | 218 | Column types are inferred but can be explicitly specified
|
231 | 219 |
|
232 |
| ->>> pd.read_excel('tmp.xlsx', dtype={'Name':str, 'Value':float}) |
233 |
| - Name Value |
234 |
| -0 string1 1.0 |
235 |
| -1 string2 2.0 |
236 |
| -2 string3 3.0 |
| 220 | +>>> pd.read_excel('tmp.xlsx', index_col=0, |
| 221 | +... dtype={'Name': str, 'Value': float}) # doctest: +SKIP |
| 222 | + Name Value |
| 223 | +0 string1 1.0 |
| 224 | +1 string2 2.0 |
| 225 | +2 #string3 3.0 |
237 | 226 |
|
238 | 227 | True, False, and NA values, and thousands separators have defaults,
|
239 | 228 | but can be explicitly specified, too. Supply the values you would like
|
240 | 229 | as strings or lists of strings!
|
241 | 230 |
|
242 |
| ->>> pd.read_excel('tmp.xlsx', |
243 |
| -... na_values=['string1', 'string2']) |
244 |
| - Name Value |
245 |
| -0 NaN 1 |
246 |
| -1 NaN 2 |
247 |
| -2 string3 3 |
| 231 | +>>> pd.read_excel('tmp.xlsx', index_col=0, |
| 232 | +... na_values=['string1', 'string2']) # doctest: +SKIP |
| 233 | + Name Value |
| 234 | +0 NaN 1 |
| 235 | +1 NaN 2 |
| 236 | +2 #string3 3 |
248 | 237 |
|
249 | 238 | Comment lines in the excel input file can be skipped using the `comment` kwarg
|
250 | 239 |
|
251 |
| ->>> df = pd.DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) |
252 |
| ->>> df.to_excel('tmp.xlsx', index=False) |
253 |
| ->>> pd.read_excel('tmp.xlsx') |
254 |
| - a b |
255 |
| -0 1 2 |
256 |
| -1 #2 3 |
257 |
| -
|
258 |
| ->>> pd.read_excel('tmp.xlsx', comment='#') |
259 |
| - a b |
260 |
| -0 1 2 |
| 240 | +>>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP |
| 241 | + Name Value |
| 242 | +0 string1 1.0 |
| 243 | +1 string2 2.0 |
| 244 | +2 None NaN |
261 | 245 | """
|
262 | 246 |
|
263 | 247 |
|
|
0 commit comments