From ff1964b51fe9a2a45a9e4783fa979bebd4390772 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Fri, 19 Aug 2016 11:14:49 -0700 Subject: [PATCH] GH13967: move around _NA_VALUES and add doc for read_csv().na_values --- pandas/io/common.py | 8 ++ pandas/io/excel.py | 185 +++++++++++++++++--------------- pandas/io/parsers.py | 9 +- pandas/io/tests/data/test5.xls | Bin 0 -> 20480 bytes pandas/io/tests/data/test5.xlsm | Bin 0 -> 8017 bytes pandas/io/tests/data/test5.xlsx | Bin 0 -> 8002 bytes pandas/io/tests/test_excel.py | 16 +++ 7 files changed, 121 insertions(+), 97 deletions(-) create mode 100644 pandas/io/tests/data/test5.xls create mode 100644 pandas/io/tests/data/test5.xlsm create mode 100644 pandas/io/tests/data/test5.xlsx diff --git a/pandas/io/common.py b/pandas/io/common.py index b7ac183b7ab41..127ebc4839fd3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -14,6 +14,14 @@ from pandas.core.common import AbstractMethodError from pandas.types.common import is_number +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +_NA_VALUES = set([ + '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', + 'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' +]) + try: import pathlib _PATHLIB_INSTALLED = True diff --git a/pandas/io/excel.py b/pandas/io/excel.py index b415661c99438..c713cafc0e110 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -16,7 +16,8 @@ from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, - EmptyDataError, get_filepath_or_buffer) + EmptyDataError, get_filepath_or_buffer, + _NA_VALUES) from pandas.tseries.period import Period from pandas import json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, @@ -27,12 +28,105 @@ import pandas.compat.openpyxl_compat as openpyxl_compat from warnings import warn from distutils.version import LooseVersion +from pandas.util.decorators import Appender __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] _writer_extensions = ["xlsx", "xls", "xlsm"] _writers = {} +_read_excel_doc = """ +Read an Excel table into a pandas DataFrame + +Parameters +---------- +io : string, path object (pathlib.Path or py._path.local.LocalPath), + file-like object, pandas ExcelFile, or xlrd workbook. + The string could be a URL. Valid URL schemes include http, ftp, s3, + and file. For file URLs, a host is expected. For instance, a local + file could be file://localhost/path/to/workbook.xlsx +sheetname : string, int, mixed list of strings/ints, or None, default 0 + + Strings are used for sheet names, Integers are used in zero-indexed + sheet positions. + + Lists of strings/integers are used to request multiple sheets. + + Specify None to get all sheets. + + str|int -> DataFrame is returned. + list|None -> Dict of DataFrames is returned, with keys representing + sheets. + + Available Cases + + * Defaults to 0 -> 1st sheet as a DataFrame + * 1 -> 2nd sheet as a DataFrame + * "Sheet1" -> 1st sheet as a DataFrame + * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames + * None -> All sheets as a dictionary of DataFrames + +header : int, list of ints, default 0 + Row (0-indexed) to use for the column labels of the parsed + DataFrame. If a list of integers is passed those row positions will + be combined into a ``MultiIndex`` +skiprows : list-like + Rows to skip at the beginning (0-indexed) +skip_footer : int, default 0 + Rows at the end to skip (0-indexed) +index_col : int, list of ints, default None + Column (0-indexed) to use as the row labels of the DataFrame. + Pass None if there is no such column. If a list is passed, + those columns will be combined into a ``MultiIndex`` +names : array-like, default None + List of column names to use. If file contains no header row, + then you should explicitly pass header=None +converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the Excel cell content, and return the transformed + content. +parse_cols : int or list, default None + * If None then parse all columns, + * If int then indicates last column to be parsed + * If list of ints then indicates list of column numbers to be parsed + * If string then indicates comma separated list of column names and + column ranges (e.g. "A:E" or "A,C,E:F") +squeeze : boolean, default False + If the parsed data only contains one column then return a Series +na_values : str or list-like or dict, default None + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted + as NaN: '""" + "', '".join(sorted(_NA_VALUES)) + """'. +thousands : str, default None + Thousands separator for parsing string columns to numeric. Note that + this parameter is only necessary for columns stored as TEXT in Excel, + any numeric columns will automatically be parsed, regardless of display + format. +keep_default_na : bool, default True + If na_values are specified and keep_default_na is False the default NaN + values are overridden, otherwise they're appended to. +verbose : boolean, default False + Indicate number of NA values placed in non-numeric columns +engine: string, default None + If io is not a buffer or path, this must be set to identify io. + Acceptable values are None or xlrd +convert_float : boolean, default True + convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric + data will be read in as floats: Excel stores all numbers as floats + internally +has_index_names : boolean, default None + DEPRECATED: for version 0.17+ index names will be automatically + inferred based on index_col. To read Excel output from 0.16.2 and + prior that had saved index names, use True. + +Returns +------- +parsed : DataFrame or Dict of DataFrames + DataFrame from the passed in Excel file. See notes in sheetname + argument for more information on when a Dict of Dataframes is returned. +""" + def register_writer(klass): """Adds engine to the excel writer registry. You must use this method to @@ -74,100 +168,13 @@ def get_writer(engine_name): raise ValueError("No Excel writer '%s'" % engine_name) +@Appender(_read_excel_doc) def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, converters=None, engine=None, squeeze=False, **kwds): - """ - Read an Excel table into a pandas DataFrame - - Parameters - ---------- - io : string, path object (pathlib.Path or py._path.local.LocalPath), - file-like object, pandas ExcelFile, or xlrd workbook. - The string could be a URL. Valid URL schemes include http, ftp, s3, - and file. For file URLs, a host is expected. For instance, a local - file could be file://localhost/path/to/workbook.xlsx - sheetname : string, int, mixed list of strings/ints, or None, default 0 - - Strings are used for sheet names, Integers are used in zero-indexed - sheet positions. - - Lists of strings/integers are used to request multiple sheets. - - Specify None to get all sheets. - - str|int -> DataFrame is returned. - list|None -> Dict of DataFrames is returned, with keys representing - sheets. - - Available Cases - - * Defaults to 0 -> 1st sheet as a DataFrame - * 1 -> 2nd sheet as a DataFrame - * "Sheet1" -> 1st sheet as a DataFrame - * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames - * None -> All sheets as a dictionary of DataFrames - - header : int, list of ints, default 0 - Row (0-indexed) to use for the column labels of the parsed - DataFrame. If a list of integers is passed those row positions will - be combined into a ``MultiIndex`` - skiprows : list-like - Rows to skip at the beginning (0-indexed) - skip_footer : int, default 0 - Rows at the end to skip (0-indexed) - index_col : int, list of ints, default None - Column (0-indexed) to use as the row labels of the DataFrame. - Pass None if there is no such column. If a list is passed, - those columns will be combined into a ``MultiIndex`` - names : array-like, default None - List of column names to use. If file contains no header row, - then you should explicitly pass header=None - converters : dict, default None - Dict of functions for converting values in certain columns. Keys can - either be integers or column labels, values are functions that take one - input argument, the Excel cell content, and return the transformed - content. - parse_cols : int or list, default None - * If None then parse all columns, - * If int then indicates last column to be parsed - * If list of ints then indicates list of column numbers to be parsed - * If string then indicates comma separated list of column names and - column ranges (e.g. "A:E" or "A,C,E:F") - squeeze : boolean, default False - If the parsed data only contains one column then return a Series - na_values : list-like, default None - List of additional strings to recognize as NA/NaN - thousands : str, default None - Thousands separator for parsing string columns to numeric. Note that - this parameter is only necessary for columns stored as TEXT in Excel, - any numeric columns will automatically be parsed, regardless of display - format. - keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default NaN - values are overridden, otherwise they're appended to - verbose : boolean, default False - Indicate number of NA values placed in non-numeric columns - engine: string, default None - If io is not a buffer or path, this must be set to identify io. - Acceptable values are None or xlrd - convert_float : boolean, default True - convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric - data will be read in as floats: Excel stores all numbers as floats - internally - has_index_names : boolean, default None - DEPRECATED: for version 0.17+ index names will be automatically - inferred based on index_col. To read Excel output from 0.16.2 and - prior that had saved index names, use True. - Returns - ------- - parsed : DataFrame or Dict of DataFrames - DataFrame from the passed in Excel file. See notes in sheetname - argument for more information on when a Dict of Dataframes is returned. - """ if not isinstance(io, ExcelFile): io = ExcelFile(io, engine=engine) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5372203318d69..e74ad78ed5940 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -25,7 +25,7 @@ from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, _get_handle, UnicodeReader, UTF8Recoder, BaseIterator, CParserError, EmptyDataError, - ParserWarning) + ParserWarning, _NA_VALUES) from pandas.tseries import tools from pandas.util.decorators import Appender @@ -33,13 +33,6 @@ import pandas.lib as lib import pandas.parser as _parser -# common NA values -# no longer excluding inf representations -# '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = set([ - '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', - 'N/A', 'NA', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan', '' -]) # BOM character (byte order mark) # This exists at the beginning of a file to indicate endianness diff --git a/pandas/io/tests/data/test5.xls b/pandas/io/tests/data/test5.xls new file mode 100644 index 0000000000000000000000000000000000000000..4bb7cd4767dd7f0632d971f43e1e87147dc42c29 GIT binary patch literal 20480 zcmeHP2V4``*1rh>0;qr@q9}%r0+A-Lq9RgkAgI_2QbH2}5fFFf!78lS3$6;HVnGEJ zcI~~c?U_|9pom!4wya&BYt47g3<;AViQ#)++wZrN-<`~yIro3gJ@?#m%iN^$qT!u& zKX^}4+&g=7(!|dsSP9}NNph*Luv=f1X6oQ9Uz%P>IkV5 zBr`~zAqgOvL$ZK`K7`4Jas3xE<6l+&weg> zF9h!|ajf7rn`R>;NfJpRlSl-7C&71fkJg0CLBEyipiX`iZQY0F)Fi`58kr2AaL{8q z89)+AG^G<#Qb` zXsDOddyvMFWQ+wNK7$2*{)77o{wI#U zln&t>)S(xgIiv_Eh7eDX0<{k3Hq^O-;yFZf!jocnLZXO=Cw@b|QQSTh-va7vL7iK{ znM3wcd2Ub?XNXESfuBznV|8-L1#ONmct8SpKm>S39QnV)(Uru4j_y)TNlTWd0ul)N z#*kR>Ce+#iJk*^WLX$yKR4(3EIc|czRAZp^3}regl7bwqzRJ<_s~o+*!oeZlDocxH zar+7f0geVwP-rp4(D^Ec?pH9l&`*9%3jeE8bd+M4@k}5-L*Hy+Wnt-HY3bxO&QW@4 z#_I$Y;~MF5{Jn?+frif%1PZWZ(UeAU z-rkJ@-H1Ku#tV$%{WJ@z-ByaQbGhQ9?Dv5 zMjNt;n9DPQU=|)w%y{->Gl3x!Q!H{SK&9Ay$rjQZN+)Q@AhoK%i$QA8H%yL*Rz^af zFo79QNJ^nz0EU^rtALr2t`}L3L8J`F0t_jXO+YDmF%1w+=pY`fErW9>TOqPFDj?;T z>Ci}EG zoo5{6xkHVq33)34zmXls1W3n`Iz^Xu<>YTVZKD629WXBk+o$y4vT$}aJm8he+h0bn zP(q)hguYb?T^avcCH&iz(3R=4TnT@n61p;cS|6P8QGO`1!%7OLDV^4*DV^4*DV^4* zDg9?9_{#KA#=lz$&R!+-ol59Ol+bB?aONy;pY0Ss&Y&s!GDE`^dMoq@kG2EOsA+n1 z^wG{oI};wQk0!)CivA}sP>0k%OO?>el+d>*q~i>p!e_-3oc&XDS^bt#`fE$+S4jxW z=U^Z~>w~KX6hB4B#Q>U)D+vvB0bIgj0*lU)iz^Kjzsx?g|3LJX;!K1~SX`iSaCJgn zPnJIy85$bv8H1hac89ANQheIZ(srRk(^+1iaTUxn!c`B`QMTos}48T>Q& zMq~yWNOtrlmD=lsnS4i*DMv>JF08f~Q8P;p-Zpe3tswGqn`S_kW@VtK1I>_tQ`}}r zzNS+N2OiZ>MA7JS(t%YbpACYcRSghyL**dYuGIiR*HjLI!B!0rbXVmd z7VT-O9bB|lLlf1tgR9e|7Te>Ff<*ABiqPyVc@P9Se0p=$A9f8z8=l`>IXow>!!~tkeThT|3t5fvBz>8}&d`*N&}vAgXJ} zP6mSGr6QswksF6$+rbBtGZ!R{hF!wyyt)8~Hr06{?}tpH806KfS27Uo2H>o$EQxd_ zfzO}`8V(%>4Y0EzndmZTd$tu5sIUo{e6~_uft6MY2X@rK9AQdJR#H;ZL@94??INW< zj?knyP2IUWWN2(--rnhojVKWpG;!zux;DnH6nB{^6gGTmgO+4-WJ)!44-YwzB9RjJ zkkl73K-jI_Q)V8tWs~`&)W;+eu?$2Sf_CiK(WJE_josRj2H4qI2OhrmOtF}DY%vNv%~FLJrl*Mm|2)54u^1D!7zLhYt3nLZ)5L+b zwY7@Hv}cP^;A!?M#4tTg94L8Mpjb=?wipGT)=h;Nrl*Mmcb$HuSd1xKi~>({RUwAy zY2v`scXlZj(-FnEtM)Vjh;diL(_CCvop?x#f!2gOYGg636I+Y|PqR`XhUsZ8u5W(I zQ7p!cEk=Q-*{Kl2^fVXObLY+}7Sow6MuDd}s1U>SG#6J%O|)V$0=5_hp5~-N4AawG zTo;x$v>SHan6t$w@H96SVwj%h;#yj{P*G`G5DOTt5Lv(J!|FFXNwk&rm9ToYs!B1a zWg{qf6dTl>1hZ_Ao5# z8y>Zk|BZajb4(=~q%Ta|qGUa=CXT*fQ2_0T*SK=f5J?Q+#XhXz`-Ue(rX_@@#w8^# z<$o^^$TgPe5qkR>ZVgNT=*F#x+SEt_L2+O#6@aS>lmIQ*V}%cKR>J`^Fz@(scRF%+ z`CNV2r2}sxd}BqClLWq^goLI1ES6sFB)XtiEDQ!CVY8+Hu0^0-0u;MZD~0YO)e6V$ zS~9KZJE>Omom4COE}zRI_SqVerF?2g(K$^nOW6;HX~mxSS3W9_a^0OJcbyt}RHhK$sM(6^!>`eXPGIJSr|R zMgVgKmZBO4VA=kl5c+-`C>jF_(sKla(}o7)7aC8)p(|>GW^^FT=eB}L1_K;o9~<0I z0XQB1lz>aTw~2p;f*`mn6C7e78@#mwa5@eu0jGxsTngNc2@bK46%aJqC;-QJNY|L0 zS@OBMq&x2B8IYKq2Ck4MUu9e)C=c#RP6l7ZN^fjvxRxQUG+glbH=Dl!43Ho$1;!;# z5=Aw%|3NHeOdu}d=F&uH{kWBh838n8-YAPj(&&Q=99m$w7Q_SeCxb<4sVU(QURZ&d z(-8vmU;xIFrhrdjV4-I;R6KT&r;lZV{2{cDnd0jaKL=FZ|PoXw}bf}rde4Dn|LE?~tt zl1_P0BL{PPwmVr0o9uCU4rIaa2kQ7*r1M@QxVuU|z1AeJ_&J1Z{hgph5`}ND7n8+P zdcbbV67c(Ma_rbK_{t^)1rk!=8Vwj?A_kmj_(*_GqTyZ)1tHK#@Pn6d%BDU@1|D21 z0v^mKhWrvq0V#lcNde>qTnIG8HCjSk#bTfXH>Lte(EuM^K$b6GPJn^9761Was4Jub zDgn(E@&aL;K`R_Sp)5)TLMBq%?{lFxH{!w-w1{D`B2lVyA>V`)GHi{{@Zdq(06$mQ zz1!FQq>o z=R3Ao&Z313(t!P?TKE$SVop{)P7eMAR~xHz_d{*6G4RrzeZ1ju&Q3PT02O zeEdl#1OIoqUuISAxt?;suH^cK^^3-Lp7%yWYkI=G>z3w0eP6xnrC8Rxo*GXT(8Jp&aGp5PJeu0#tGwejjfMn z^<3NEue|DhY0}&>w;U%=J%{4xvcjDQ@_Y{nZVr1rPdN5$%8&~WOQ)IFIp{p!QTVt+ zXlH}>Ed%a4Ir!eY__}TXfO+pl4_z-EnY{Yc>DoHG`m-_jPyG>0wo2xFx#0Htc6At- zLIR^~d1IASZ+>(Pp16a(aa&ASiYQ@fHzA&L7F`4*wV3@p_wcMsnL{q0G_g+aRe4xb%LDyu-6E{A zHZPeHcCGWuHSO=X2VQw=JoK8!#PUU3&PjHjbP}%#KAu(K@W7v!=&>v??zf0_Te{sJ zc6jj?QQ1|q%H8|kSAT}0G%e_)>C260K>_DN!q@q!v7*VMZj5sfj3;A6Dd-+8#6h|z z1qOZYX0DsIxW*&$pq+ojj`QUQ|3!R@I{2I^zPz^f+JpPEJTBZes#-Ou#*>uZ|{vA&_B-}mI_TiHZlXJTkw7EUt?9?HdmQ{;$x9_eKRPVO0GJoz+ zH(|e3e6M-^Mn1msuFF0DdV3pPuBT$$ENqFuCc{vhE`l?v{m1Bwi%h#4 z9kG1rJ^$gFhu6ebqq~o_&fln4H+id#aA+?Q8Ul<{VRP zUTD0v@Il3ByO|bm!|v3$7F?fmGI3-tqpNj;PQO0<=;HI5+dI#!sJN+Lnl}G_&QDw7 zR&@I@L9d^syUnFtaktIh=%&>U^qhM-ZwKGwsrBGL>_2w?M{@h8FUjod+>drOzCm7h zW*iymU}G5@aL(HOjNzeGKY99fG`k-+rQ?P+FHA2-l!U&TYi_Y*vcDZ~uZK+=mlM@} z_U(E%^1?wQGs`u3KNv3fc%pl?gH?J@dp*A$fipUcJ(K4?`EiVRu-2cw_d3pP-Ktw* z`hYJrr#_$e-?gIskRvBY{9J$Ts95M~a9y@?hs#%d z%qMOgj`trXEDPRS7d!1{i>=w0*J|BfJ!bQm>KAh*zZ*YVZvUnGAzsOs)s8bN9{C#K zb;dNk=bpRgK99}N8Tj&+@sSbnX+?!UtSHW28oMV~xPL-b*aG+2fhN72PMDTo**e_i z(Ig}B4>Qu|2yGnh9eg+@@7d4$VC1oOX4^z2+}_ z7k9<3CcbsAG3V@Trrz4+y~c7>yGa!lVW0l|aLTp*?YzyK7p+;_`@@4bFYjOJ=e_xe z+er z8E0C5{9v0|IV-bAwtMGR{#h^~IpXpAF$VF+EKkR-I(3?iYm?c<(9B_d>HgOl>+U6* zrzZSf+i%J88_yzI-53xk{7fdfB@H#2;&#gL;qr_#(I%z=(^eQbmiLP?+L;ryFKxxB z$$s?{zN9)p+bZXfLlDorYD~hyXQL(u1Z{YBC$f`{9(Rbwd(NgYCKc;7`>i|o zfaHd@{L}p2_j97M+Aj(n=W%!OlP;UYzb~A!bY()~FHRGzpIqzHTO8-T{(MNk(?#OI z-ui}-U3Z*}{zvwS{W`U_Ly9B32Sz*jFGI)SpHXEk_y;##GoN$8==rYM?{?Jf8td#VTr;cX&$n_v z>JLoSc3i*8WW=fTIp0=PK5u*D%WeBN<-0%qD{Z>tsp88Kt}CDId+^2U=KjxX3d0Hjg@2`TY;8PWQht%K83U1G6_8ZnNvYt1%wD zX~4s83r9^|eynteyJ@Cg&+$3^5{lOJI4_!VBJh-@&y2bWDX-H#onII|t*W*yy?Hpw z>qpyLZ8_l|ZLF4ENNqh_W6q7TA-n-=D$HZGTBk%M<@%pIwqAI%V(9Ku+$|4Af8S}T z=Et&4PkT({1|%o+HCp7q=5lDo4*UG~?yiUJSKer0cYO1L1g&kZbq3?6wF;guTpj=D z?5=>m1&KX$+P5Elaw%Cb#of!letPVoxN_6_W$|zP>~@T@bBlkQ6*=Y7xLI|Z+$;MJ z*X=OcHmG*%0Vh#F?5$n4$=}Xid$;4m!4~Oz@&?~+b3O0OlbJ{Jc8KriObcnfk+Wib zewj~y&xMWyPi%7hVeRJc1BUuq?M>0--Cb>cjf|dt?X1t`w^sMh@fPhXOfT{G>ygRJ z4c>j?&k==T9UUBMxR>knPuo^67Tz1n-?Vb9{XDX80C#Q1KjYla@Qw_8*muvGEYsXB z`QzX2EH3hm^IHF!h}!zMeQhKx>ABf*?JE78rgP4AS--!$ps*~jEN+DJ_SwVRU(b7RYVM&E@ogjQR>bE9mwY!Y#ngU#?RDGV zhI6@V&Rh&hyEw$aD8BxJTl~zIds^uIdST4l8$Wlr)Bfs(VOB;}??xng+*^K)A33cu zW8FXX(^u^@F0|R2Su%gup(7zlCf8q><@M7(@<3=89M$#i7eT;J=D(lsHK42MPkzr{ zC&V4=Hpwu5)92#f_H;aY??dj;x~IlPWy88&tsAksGNmqlfBo`7tJ5Et8yTDBh&&GW zcyhd*?E$ecB62ig_Zw+aTC@rv#+t9dnhOF#_3w4dD$O&zUX>5 zVE3he=`M)hIx@M(({I<^{qAYsGp{GDo^WPb{eoM*wLO+k+&uhkr;4%NGDNq#c}^HG zf5R=it3?A#9$X#sw*1PsE1$*)c%=)Ye7^S_H1i`pZf1`_T3%Ybb`SJvrqFHMvidf9 zG(z=ujPAK%bP}X{!pSJ--OOqHq@*hc43^l` zg_510Jv+>a>DJkJ{FfmtES3-oWa4di~Kt;fYFB7I=C#qhJf9B=%8 z2K;4n3phu?57zM75YG5t4Se#c^yTHCh!F2Eir$r?+f(P66eXLQg3y@OurvVsI5^+K zljATwq5s)o7R8-Ioo#?SnmQhO+cK{0WGIilyD+ZT!P^rKAmUVw11^+t`Q=fV7{YPI zTly`L35QqPDR-? zEHRsg6GkTOGgTw>|6dS-cPLSxn%e#Y);3TU>iohRpP>|eYT}A_h_6ra;~gSn4H0q; zY6oW2P-CrRpWd*EVh>f^KJY|(oaGymv()sl4@IIbI{}OaNx(i;o?V7Tx3ep)THQCK~Q*PTw-d{)L4OkMx-bKp7lY-v;vFj zMU@=9eland`p+3UKH@|9Lsj{?Rlimh2~7?1S3vn4-j`$fe}RN;0?Qu)31j^zNciKG zC`h;pg?eD`gayGRJhTU{d*nbud(4M~Wm^l0X%DoeU+8eEE#g0AAQm3VzRf@g>Vfq^ zFP)MWKtf0~FJ>E=lrm{5uBuL@P?1~y;557!O@=oP5hPyzp&EDiPb|z4S^H?HU}1v*xBz?r0Kf<+&pvLl!2kff zu>k-I06wOntb@I)xxMQ{Z7)Z27h_&eJKO7-*qH3;08I4#|Be6QCs3sQP`#BO=qG}d z;P$;`(}HJ89EjJU$Z|#cj+KLLhE0-53v#QGQV7!T->ovSb$I47Q6I%>XIe&rU6jHz z`GV{LuCAEzjCsrK+Te*~A3vlp$BBxt3dbxCi5jBf_PfFo+KUBDxGUeMbYLj1*$Jm0d}Vtz55I$7svm5F_PM!M5`CS z3rkeux>!(3074MPBpJe{bk6Iy-CL%co?;-P?HVQJKNeOb z(9p-c6@3hIExSH)>PHl|3cKF<4x_=bN@B#c_Z4ya95kJ#rY0cmv#->b8jBXMY<&X9 zXOvBeb+T)zuTxzwH%_q zQAJab2+h%l&gQl*e7rxd|8etwn2^6L9UeOf!4d$5?mXWCuFig}dJYqsiNJ3R8^fcH zo08tNXJsC(^L-kaUQ_ysKCa(q|6{bR$EavNMaS5Cky2iW#xhU+fC%m^>0lwAhVqaE z-DF&VAj_mqwaAPft%mb`>-rbkxyNSdlmyWRY%?RUxAS+Tn#brAU;4l!NiCn+DmnD- znRLw&=Ub>&df+b3zLU8&z^Hzn_R$TY?+txQ*VEUEFIm!U1}6y%uo_~ZJM&m=>TXR> zFczMu|JImCuRN5IUPWg`3BAVwNpSf->}30J&+W9JhS}AlamWXi<7i2~7xmekg?M2` zY4H3Gir{xNfyZ~)@1vhd9hxSz0DKHjTfRS-;^E+I19fn)`5|V1u?7RJXlU8{-yTJ= zgVt^Q#GN~Vje*0yP9u>53MnR?=@UFp0j69|mQb+DSzL8mPr6!$Yl0DRT2S*2TgKM* z6T0@Rtm}b#8k8*f2dtB`9)wbf6ZLGs_Gv>&>}Wz(=E-q1ZbayW*3_3zm6+;1wdP+h zmA^Eji%JX!4zJwjrHVzl<{B}F+X7+9l=@`RDWMhI5ImMXqo#u?&65lZU*c_ zu<9Z=c$JA6DYT3smvuk1XkPv5n=8vJ$Duk)DfcYr#8$R=IpSAr_{S6Z=NPYbGwxYk z>mu)|y>A>iRYcHs@@N3{F67E4*=9cUPQMni@SeFqvczk&s> zrRFfhPuxg0=?8T6SZ1#RW518!W_aNLn7y=8(x!}#XgHLyr##~Eg>OZJE0_C}JhMD9 zY$s;sOq1)tSd{M_2huqP%1m5cDU?7i{Y;X@2JZ7H*P@qJbMfi|(ie4(a4+3H{W#5q z30PA395Q5wk8Ge!JPF*R{@masnfg~p!w7>A6@A^9V8gN>sq1Ms&C?kQe ztb+XudEEM|l|b31%C;#ZkIqTp1(yJ<=gd-G%t(r>I33>H?2tv2=~J1JOij$WL6RE96ak&RPvucvS$qwAB4 zqfQ>b8v0fNaX%4Ii|+_$m3Pnds>&1U`#LxSM;#Bos9YZKo(aOKPVC2kX3;HZS^(vC0?q>`lWoKT;>nqVwY~@dm82h;IzR&|wJJA8&V7yU zvS)W4S*-HgpQR}5%j-Zb$O?0DWIwM+?M=0HEqq7fz}cOnO)d6zWie-VOC6c4S9u~r zWh!lpUfHJyjV#~2&6a0Gu?|Ps=4iK?;4-OY8CP$Wt5MUaCa~#}sXtC`_ zSc$&KFQ-lZzEaqxrBIhZ8QpbUQfD21hS=C{>hJdJ9nfpV`Q2iteHWecaRC5B+8@pR zPk6gpn%kN4{k#hNSnH<#U^Ms^sObh$8orvkI?>Opf6I_1E}IBpSIEkxS zI=O>&UTHj)yL%VZjuQsP!`8ND;aw@wXMfIwdN@BN_+~P!&0bFuG=@;=+;3bP+?kpy z?@B27O!n3dz7$lNZx|)D8ZgzdgO>A#12ELF`D zPEN{~=&pP|)$zSgz9NA}v#Cw@{YFo7Kd&J5AZOX2r+B}F8{Gubpw^Ed-3L7J&aJUH z;VcdyMBActpSM808e!@2nYhq@Bn@{i#URh%({A|1I@MY{*ZHjm?40uy9-opmSv?1FDv(A{i*?M`+W@_03#a~*S**$mYRO0AIhM46CUQKl#6i{kuC{;d1N)pK4ncV zY0$&Y>M~m)oqFTJRhyEMhu@A*I|MgS5qWvhscBNDD+kD>QAO#zCZRSgkeW`L-kcxu zs5;HSPx5SgJg%hwSgzyZeDq24_tP92{0+;GUsfj6FFel=Rz%V+PTDB28Yr_e2pL`g z**>}XKoL+zxEij)lc-Q+l0Gq}oDdPn-yv{=mrQ6L#gfTt1m0+=rw9ys?XP$BNT~Sx zbrVPEmlFe^^@kgsmPs9*03!-tjw|%snQv?NJu?>V8C`AHXB1=Nr?;V`)5@y6-vKs4 z4q6~5p^hu7@t$24)L;dV1Rzhbq8X?>NUg>#;enkToRQ{5hH%U$1w7`U25vQM#&HAN zsO<-K3Tj!~^QPAbh%hy=PB3x}s7o?&6f=5O0KwWM$Bc^sVP<|b(b`5L3opKM1`LpLv;j`R_z z;Op(0m2-*Dwck^LkTV};++n6B%uA%5Cq(Z=%9v^LYX$&y*L34jec7!Aks-#LVm-`vcaktyz$cdK#sla0lSbj%Fonk*eDQ@D?Fs6 zLrAO7rZ!Vp$8WA+Ia|Tz=pCB2K(~62$-N48Z-HVG*7fyyGpwwCDab>EJR2h+Gp;~J zNi>QF(wfT?l9m^spe4&(?TmxHkis$n(D$0?mKi}YFZEzY*Ce{<$jgcQVh5{NwjS#y zKBVL3p28eKkaeMy<3&)2M*?lGe(uCC(Bw% zWbOmZDdj2mLA*J)^KPW=QM0R0y=F{@D3#q6O%BH_#MLRx^%_?a+bpVk0uJlu$gC}X zlUM*Nu+kNH7pg-u30vt1Z)mlqgt8}C2dn2iz3&X>dN(u4wycARnc6j|;WnrV1X*tk z2w6WzRuL0@956slzo(t285_9I3>rVuwHH~8pTIZu81l^MC38 zF1XJ*hS04XaztMH*v)lJNv`}sX5Jew`y-?ic4UY zS2i-qe91M%65bGGHQhM_itsu%S+O^QjE$42C= z?2LH)qdXndwGu-iv4_$#v|vQD!m$qAJhNF3_^| z@4k&F;SCsxR&WNiYen*>io1H*n*WsV>^QZE8Gc~MPVjfKyGN1Ga^t#|Lwdr1;3|-C zu!*_8bNhOm($d&=$xgR>a*$-jP=ur)tt4%uAZIF`O}BDLej}Gv)sz-f%5J2)ow6U@ z;ZYiU*azdbNRP7YEciif-I<0Qb)=6ZgaG>rZ~OuN&=z&XB0EnscuqN#s~4{i12I%K ziPMwxT3EkrZ|g8dzxY}Tfdtn%4%EwYqyFiFU6;(nv?Vng6^rhsiNP9H2}!)T8JSV? zRNOwPLnXeHHx9U6pyVahp6@I5%`nEdoG(3qVY`i3V}!$BgAhsk&7Y-sa^{nDD3Ky! zPJuD-vI5}j@g2acmeyR@lI{RaTJ5mg5Qmym;YZQXebDQ6qrIZe!kp_WZrshVqzVBW z9WPD*mQKLBKe2r@ss3i$#65UqcXw8lyzEef9w#l>68>=M(aRN(w@ zH_$%%D_K*U!xo0dN`a-2L*o*zny{=cR>_TQffB2lX9eKHblAq$Y^%zBExJ+W)~WUg z?$!RJl=kM_kDC{ru`-Cg7*Cx;wc!mUGpas&Zp4OPRE+5PSfg+{g2%B=>SodcQ;V=0 zV8m>&@X3^Bm;&c4ShOgDrE&71dDhdU68$+;Pg@ZC*N*d8@mG@fjH5RCiqC(GjUc}& zIX!egk3wUE`VVaU=;>W7&COk1_PZ`u*e?Z%ADxy}E~u($Kj7UC0hEm zh$2HL5M-#Ye6+62p~+M`Gd{g*7wnbRCU#Y_T4Z*ZivwO&=XgJxsP4|(nh%W;ZuiTa z+IHgUG77AY@ZvVJYopgye0@_{k~foXIr*GnQ{v-^RfWY?eKUM_wACr$DqvReOt!B( zI8Jf$jt(P|PU5(H^mR36OZu)mq7ZQ!B1v1eLM2@19aGDtNlvCfiX*MPUE(5UJisJO zloheCCgOXF>8vLnlwMk*9oszXPdQm4JRsEp-u8^A^<|p$?C%zCkW_d~4&}^rD;|yj zMjQ&5S~<3R;z+@0{D;|rjaZJtq{)$(PfWPzLMwuO7?{2B-kpd7`vfuNb^za^p|=Uq zGyGh0GjV3TYC*zt-h$0Wg_9lFmP-UtouF@auS1%^%?ndT3#uW>+8^jkIj9PTrct<6 zWCPo?;ukDutO2No9y)|7gj>RBVUPafX}iQ25pi9+>#5?jhdqHYyd* zEK(!%MrDTvZ2qUY{+|_rW~oo?L-jy@P{*9wH`45#p;kIx zlG1#aCed>C^Du|X#u=D~l3~~37EdW-L}%Aw^4W%%NMvq3lSfs$t8Op#3B3%dl1aE~ zvFSnb0D{1|;;~j~!%JY6q@|GBz#x-}rU~bRf-+3=n?q47%4%MeS&;&i9S`iOxQ%4!oi~&Cb)21;@q}JO|9NU>_B*tZzJp!T zCRhbSexwz>AN0*E9!}j0t&y#oPnoRu5Z{P&Gk%K7k9@p;c5LT#F{bM#IJFs$1z~F8 z(EFhJHkM|_qScFAdNVWms>TZ&iiLm-eT#t#{mI6Xo4|=PGcVM9zdge?(>KHN%__(d z0KY3F%lK8+o_O7x9G92a?F{v1o2QT0@eZJ>%Ez;}eQ(YMZSjW7`d@@)sIV4GEBCOF z;FBm|Enaz1QL8Skw1<|)e`}|yAoL^-4M}<|008(aBp*9E{)b336#pE_u|Lh9kR7Z; z24VzQ%vgXppa`lYd{7F-Saf}&n`@d;#SANo*_=?Xis_tkUyo$qGm*;7-;Op;LQ(g4 z>%mr-uF{Avacz(WPbSPcG{NB1MmX<09>6GKYbF?^^X(2WZWv~B>x**AwbAKn#afkO z8Dv+fq)c~_ism4%`x+Ytf*njGT)7?gzm>P&$SN$YBX9_TIvb(BnT+-(1w^Oz-dFly z;7gKGi7VQuC2c?M6!XPs2J(@I+)+O>oU;DQ+~o zxnZJ-X#Sq}{q0w`+15kd_$tV9<^W4xSrhV=HrJuAq!vCmgT`bHENt=O+_1L&$MyFe zOlyMjzJ5mS*~j?xj!q0%Lrh2wFVJxP1IQSdJZRhI_lK8%74EP3H%FNoD*sII&y$tE z0)NceXm}WXGi%=c%$)bx`#jIuM?)C{69m8p-~s>uI>0?K$vz`A0N@2C z06+@BMK_douy-}LcYUDae+olko`hf7Bol$kf2)9`E=LU-9kLM#4zQ}t!!i| zr~`J3&oN=L`-3u)_etNbY;bO>?ng057q*5<_oQCVG&8`W#QlDFFhpD-42toly&cC< zT>ET_ZFy2SNeW7E8^=qGVy^GK7`nWO8x2g{`?&!pr}>c0MHADb`d*sat%gFaLSV^j z5O3iZi!~`@%m|70gs~^Cr%@#j&G~w|F`K5Dnvf$hIdmyS3S~v+MD;cM{XJa;#}V1I z2X~XpY_$EqPE@RsLU?2}sK5YgyO{25Xm{F+4DuY6j=lCKX{b)x0VYkU%y(JySH zFzM|$7K1z*Z%ZW372}L&Z26io;^mQ*r+MOjFQWp;X|gBwEM8}@{{ufH8vqO8{pel5)3YM7S#%;Qr_Z+m8vvEXFmd2<1c;&4`G4Yd^+ zOpR43$>sZqlkJ0jw{JytjIJKd!#+oOj+P|*QFG=@giEsuLl<|C@O~!~cw(3R0qU7F zpkzV?z(w=4<^Dq{9uCenFb4;lAJywG)u5qD7^=?wZ;z7LA?pqv!tULG=714jr_o4W zxipjR%t_8C08@4+OBh)BEUq@AFH<$kHOYuDBd~RsC2M=-4KgO&L*}V@ z4}8hw$wn4n=Zv8QW;8xC*}lD6zCg0wH9BlRK}Z8Miw{LZ`horAw+c;pl1h3=t;T|TR zvPi2uwwJrp5jQH+|K3?GR&q$XP&AlBQHB_G2Z}Pkk_E1%>M+Yg*i1a-2XysVh17sC z-$roI-hcK8QeG`#Q$bBI5<=Hk8S&`Sx2nmN-F;e?Q5G4t8#8;R$$n@o%>9NH=^O)N zB&@9#NFurORD#I{?(;C;;ysn-@{J{=@6iRqy?p1}hZ%Nsz>4x*@UR^&vWYDDG-x}} zU&S=d0_~m$U5m@}i54mtR(!3D#&M&xXpyM0O^+Mj%g#0 z?kV6UJ8x*$w};2N7`Jz68Hpu?wb89v+|qL?&S_t=%B)lhRNuV2jSeuezkVXEBHk}r zH7sqCTjoI<)KrD0wDVCPlf^GfUU4bOGA7KYRfUiHk$ap32Gbrw?)l1~TdthFc`7T> zIWsA(g;jF9js3OVG>lirFHS!w!k6NRC&)QA(>RdPjj5$kr;ieb&#RQ&PkB`nyTX}e z-Lt)F3IzH$hGyYMCqpl)R|mak1F>q82M&Uo7iHi^owxOeY_#l->ZV2J(}_RlPGOnl z#|`WOsgeh$DC;>bjtG92@i?SdYa=Stc&My{{>ZqC=6)qCPB1xBQs z@FUwiopuv!det1`+U-hJ3QCnE7F}ZXhs;qKq|FFn;jlI<;TMIKRH@(BiaWIA8j{GO zdrrz4tP{@=TRSZSy?*Zp_1dw1x7ZoqW#>X{0KkyyM>qeI-maGBcIMnaue?9j`bB>z z8hjhn!hpO3|CIh|a)43)wjonoE&;->n3;2PL@^Z(9uiJ=atA{bG@i)Zy9esT3IpR{ z>R2;zt(ECRp3@&aSe)j6H5JxjuO|T-M<{n6G_McsPA^pUB$dq(zjlMK1eOt^f7j_5X@R3W>UWPk9X^;j`ao`K{O2M zp~2T9&)RQ)dm+uBz~JLv_~j<~dLsM9?Iz5;i!@H3vUO?DkN2J)1iQl3;k%ZecuL55@)lid zx76g|q|9frmNh|K^(GS!A_YTKC?t5=!KGuxP`-5=&_J+{dl(lp$8zH*gV3F~bvOXD z+|an*<$jU$%QNG9$jQJ>s!x-4ap^8a2VPG4zbKg!*HD_nh^d=qwx+cac zTD}2ASvHn@R^J*RNaxxK-Lij}nIC1ej*-yPiw)NNfz2qV*q65UojfK?82dwU7 z_iM;^rw*t{5??oEtLU(J`mDVkZW!h*z$1^4@g{ex4w6=v@9u4Djd4lx8m5KpGb)$$cTc_-g@ zNqqNOQxNP+AZ{EeZ=0Gf+HdaEuF}OI<%2MBK5!T2-2-h;d}Mh&*K8)1z%oyU_UMe6 zF@DgM1d0(hQ>2Nhm|jL_iA~~c<`5*?@h+Qdu1u@C3%zhn-~VPT)>%>hw&+82IY--j zplI3jQJu01_PA3){$}=+c0OUjNbl2Q0uZH$p5gi4N}va`Md{PMnsh&K5Nn+AQDtVTw3S%)0l=J0mh1q;l_y@nkg-nz zQJ;QEm#L&saZflk9K9GDTAc4Sp&;_5q~S3*td}*rzVuacQD~8sF7KNVDCJb>T32{e zyEPdMl4KpEp7%uC8O;7>c8X;cijJP%Go;}*qzMFBZw(4qKS$OO5_}jmIGTA&wMaQW zsLcqPIM%fnT27qAHT4+w%z97ko>EDk%v|i72d`>s(Qu%c+M@}T$7xyPf~cFZEnVD_ zE2rD+mr!H_m&@!N*jl~gwz(dc|2=cHhPrk6TaXrFn>@8hn{6DSTRrTIu&&=qR8DhW zqXUuC1eyyqi*B^tU}+fO(l9&w$fQ*Ftta1e(QWUnr6=3>dJb&TOnN-FBIl)NMH3$uK#y*g8FG7?FK`Q+ zD8h5*A>*k^d28baN2O!lFSj4x!h9WNCgm@z{QT?^Rkr@!w-F@00wYlsoEBwMUHh|& zyL#E0|196RajFrsJiy@Hpzp-@jw4}}#tm&>Y4H7nYCwWPCg%FiotqsBE8{z5yS?tI zff7~25fc1V5>(CnZ0R^Qy^6tw&Foe+(^~Xtdy(#Tihk5zk24@)?~FSlJt}f@;D_}M zXBsjTkvW%GwFUYF<8eayOtn0;jRo%HEsH1L z_f1g$5=Qr$E#3ndw%3d?jz6*yh)6kTos;CuTTF$LA%#So0%G75MZo!!yMTnY_WaNl z-9gHX`VqHbR#m6s55ge_pqHIS`z77QdAF3^I9fwfs(5XnUTgpisQ>0OLi=bU{VyF; zOGqNHh-rRPt68mowEhLs8Y6|MBzz_W8)rScM?{BAj_uvO0Q=|-;+77FZ8VLwB1-{> z<`ryJL1|r#GKO5?JkEb=m=`S4hbp%2-x-Mcx6C~7(qqh1>FMi97K))IpJybt`gyIIpAKduS)4N!j zo4dMj|D1kyK5!jlhZG)=Z${PS!&8a27xB5F8hNjapw`vah?&PdM$gIMueN=3#xBmd z}sl+xrys5~lNnbxZ zF|%hEy&;Rh#?NEfgj`AeT985Kw z9UNV_O&y%g|8&#;vm#I`^@%lBZ|9*1+EqO#R^FALrBg_@laXz@F75iXvvH>7-SS5+ zyS0^tSvwv&D(+dA`GWN(EAjb6*>Y@Cmp3FbPcX4E0~6~Fm_Li81T%o{_av+a>AuLk zd0#+nXb7gsn{{m^2q2(f7a~`j7aBI4wu?up90@G z(;Cp5srp*MU3kie{Mrp6UZ1Do3l_)fsuMhVN2QpV0*TM@Fy^e{)J{Z`+Hx-TBJfS# z-F;*Xk0w0gBd|vd?E1z8G|lH76o))?K6AXWvXJ~(2s(pv7h96rrkFZ@^5O$yO11#K zD==0{A%rq=s_R+I(v$CpeLgp9njyz%7c4~5eE1s>h9nMpxSV_^#w|PNjHfRS!M~%V zDL?Eq4n;{C3;+Q5D;q_K6Aw08>a4$&A4b2;>d*F>1_-Wm8_%c{l}Zq~YEKq_&XTq)1< z@ofR~nY+Lgn@G~@&U)zsD$Weo(p7P&7zStQ7_PCja^Y%TCJiw7$gWLpv}hd~9qc>M zd!a_qL^jP2BzhS1na;p56@W7x4|f8G=3K0;5Zn$`QDSb>q3)=6rSxAkMNG;>KM? zoVNg2@yeN$t+u%ZOORZeYXyx<8(7%l#JOSYJe$xza4@Y4EZCSk+P9DKdp|ZgXsu*I zWO#|9>mN-Z8agM+w)y>OWYyQpIrH1l91N`%3Kzsl`Z_&+zp gU*Rj1zrg?88a0%$P}v0l@KLX4sES@f{o~XB0g6JUX#fBK literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 34e47ebcfcf5a..09fe047af8ed5 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -244,6 +244,22 @@ def test_excel_passes_na(self): columns=['Test']) tm.assert_frame_equal(parsed, expected) + def test_excel_passes_additional_na(self): + + excel = self.get_excelfile('test5') + + parsed = read_excel(excel, 'Sheet1', keep_default_na=False, + na_values=['apple']) + expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']], + columns=['Test']) + tm.assert_frame_equal(parsed, expected) + + parsed = read_excel(excel, 'Sheet1', keep_default_na=True, + na_values=['apple']) + expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], + columns=['Test']) + tm.assert_frame_equal(parsed, expected) + def test_excel_table_sheet_by_index(self): excel = self.get_excelfile('test1')