From d9dffe1d548332f30d6411013ab9b6ba36950c10 Mon Sep 17 00:00:00 2001 From: Troels Nielsen Date: Fri, 7 Sep 2018 16:49:23 +0200 Subject: [PATCH 1/2] BUG: Some sas7bdat files with many columns are not parseable by read_sas The reason is that column definitions may be split up into different pages. Allow column information to be parsed from different pages and add a test for it. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/sas/sas.pyx | 8 +-- pandas/io/sas/sas7bdat.py | 47 ++++++++++-------- pandas/tests/io/sas/data/many_columns.csv | 4 ++ .../tests/io/sas/data/many_columns.sas7bdat | Bin 0 -> 81920 bytes pandas/tests/io/sas/test_sas7bdat.py | 8 +++ 6 files changed, 43 insertions(+), 25 deletions(-) create mode 100644 pandas/tests/io/sas/data/many_columns.csv create mode 100644 pandas/tests/io/sas/data/many_columns.sas7bdat diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 649629714c3b1..f8c742900cae4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -742,6 +742,7 @@ I/O - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) +- :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) Plotting ^^^^^^^^ diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 221c07a0631d2..8395dccc1e744 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -244,8 +244,8 @@ cdef class Parser(object): self.parser = parser self.header_length = self.parser.header_length self.column_count = parser.column_count - self.lengths = parser._column_data_lengths - self.offsets = parser._column_data_offsets + self.lengths = parser.column_data_lengths() + self.offsets = parser.column_data_offsets() self.byte_chunk = parser._byte_chunk self.string_chunk = parser._string_chunk self.row_length = parser.row_length @@ -257,7 +257,7 @@ cdef class Parser(object): # page indicators self.update_next_page() - column_types = parser.column_types + column_types = parser.column_types() # map column types for j in range(self.column_count): @@ -437,7 +437,7 @@ cdef class Parser(object): elif column_types[j] == column_type_string: # string string_chunk[js, current_row] = np.array(source[start:( - start + lngt)]).tostring().rstrip() + start + lngt)]).tostring().rstrip(b"\x00 ") js += 1 self.current_row_on_page_index += 1 diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index efeb306b618d1..58b7340caf558 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -82,7 +82,6 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self.compression = "" self.column_names_strings = [] self.column_names = [] - self.column_types = [] self.column_formats = [] self.columns = [] @@ -90,6 +89,8 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._cached_page = None self._column_data_lengths = [] self._column_data_offsets = [] + self._column_types = [] + self._current_row_in_file_index = 0 self._current_row_on_page_index = 0 self._current_row_in_file_index = 0 @@ -102,6 +103,19 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._get_properties() self._parse_metadata() + def column_data_lengths(self): + """Return a numpy int64 array of the column data lengths""" + return np.asarray(self._column_data_lengths, dtype=np.int64) + + def column_data_offsets(self): + """Return a numpy int64 array of the column offsets""" + return np.asarray(self._column_data_offsets, dtype=np.int64) + + def column_types(self): + """Returns a numpy character array of the column types: + s (string) or d (double)""" + return np.asarray(self._column_types, dtype=np.dtype('S1')) + def close(self): try: self.handle.close() @@ -288,7 +302,7 @@ def _process_page_meta(self): if self._current_page_type in pt: self._process_page_metadata() return ((self._current_page_type in [256] + const.page_mix_types) or - (self._current_page_data_subheader_pointers is not None)) + (self._current_page_data_subheader_pointers != [])) def _read_page_header(self): bit_offset = self._page_bit_offset @@ -503,12 +517,6 @@ def _process_columnattributes_subheader(self, offset, length): int_len = self._int_length column_attributes_vectors_count = ( length - 2 * int_len - 12) // (int_len + 8) - self.column_types = np.empty( - column_attributes_vectors_count, dtype=np.dtype('S1')) - self._column_data_lengths = np.empty( - column_attributes_vectors_count, dtype=np.int64) - self._column_data_offsets = np.empty( - column_attributes_vectors_count, dtype=np.int64) for i in range(column_attributes_vectors_count): col_data_offset = (offset + int_len + const.column_data_offset_offset + @@ -520,16 +528,13 @@ def _process_columnattributes_subheader(self, offset, length): const.column_type_offset + i * (int_len + 8)) x = self._read_int(col_data_offset, int_len) - self._column_data_offsets[i] = x + self._column_data_offsets.append(x) x = self._read_int(col_data_len, const.column_data_length_length) - self._column_data_lengths[i] = x + self._column_data_lengths.append(x) x = self._read_int(col_types, const.column_type_length) - if x == 1: - self.column_types[i] = b'd' - else: - self.column_types[i] = b's' + self._column_types.append(b'd' if x == 1 else b's') def _process_columnlist_subheader(self, offset, length): # unknown purpose @@ -586,7 +591,7 @@ def _process_format_subheader(self, offset, length): col.name = self.column_names[current_column_number] col.label = column_label col.format = column_format - col.ctype = self.column_types[current_column_number] + col.ctype = self._column_types[current_column_number] col.length = self._column_data_lengths[current_column_number] self.column_formats.append(column_format) @@ -599,7 +604,7 @@ def read(self, nrows=None): elif nrows is None: nrows = self.row_count - if len(self.column_types) == 0: + if len(self._column_types) == 0: self.close() raise EmptyDataError("No columns to parse from file") @@ -610,8 +615,8 @@ def read(self, nrows=None): if nrows > m: nrows = m - nd = (self.column_types == b'd').sum() - ns = (self.column_types == b's').sum() + nd = self._column_types.count(b'd') + ns = self._column_types.count(b's') self._string_chunk = np.empty((ns, nrows), dtype=np.object) self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8) @@ -660,7 +665,7 @@ def _chunk_to_dataframe(self): name = self.column_names[j] - if self.column_types[j] == b'd': + if self._column_types[j] == b'd': rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) @@ -674,7 +679,7 @@ def _chunk_to_dataframe(self): rslt[name] = pd.to_datetime(rslt[name], unit=unit, origin="1960-01-01") jb += 1 - elif self.column_types[j] == b's': + elif self._column_types[j] == b's': rslt[name] = self._string_chunk[js, :] if self.convert_text and (self.encoding is not None): rslt[name] = rslt[name].str.decode( @@ -686,6 +691,6 @@ def _chunk_to_dataframe(self): else: self.close() raise ValueError("unknown column type %s" % - self.column_types[j]) + self._column_types[j]) return rslt diff --git a/pandas/tests/io/sas/data/many_columns.csv b/pandas/tests/io/sas/data/many_columns.csv new file mode 100644 index 0000000000000..307fc30f33b9f --- /dev/null +++ b/pandas/tests/io/sas/data/many_columns.csv @@ -0,0 +1,4 @@ +DATASRC,PDDOCID,age,agegt89,ASSESSA,ASSESS1,ASSESS3,ASSESS4,ASSESS5,ASSESS6,ASSESS7,week,BECK,conf1,conf2,conf3,demo3,demo4,demo5,demo6,demo7,demo11a,demo11b,demo11c,demo11d,derm1b,derm2,derm3,derm4,derm5a,derm5b,derm7,derm7a,derm7b,derm8,derm9,ECG3,ecgrtxt,ecgrhr,ecgrpr,ecgrqrs,ecgrqrsaxis,ecgrqt,ecgrqtc,ecgrrep,ecgrtime,mmse1,mmse2,mmse3,mmse4,mmse5,mmse6,mmse7,mmse8,mmse9,mmse10,mmse11,mmse12,mmse13,mmse14,mmse15,mmse16,mmse17,mmse18,mmse19,mmse20,mmse,mmsescor,mrf1,mrf2,mrf3,mrf4,mrf5,mrf6,mrf7,mrf8,mrf9,mrf10,mrf11,mrf12,mrf13,nvitl1s,nvitl1d,nvitl1r,nvitl2s,nvitl2d,nvitl2r,nvitl3s,nvitl3d,nvitl3r,nvitl4s,nvitl4d,nvitl4r,nvitl5,nvitl1,nvitl2,nvitl3,nvitl4,phys1,phys1a,phys14,phys15a,phys15b,phys15c,phys15d,phys16a,phys16b,phys16c,phys16d,phys17a,phys17b,phys17c,phys17d,phys18a,phys18b,phys18c,phys18d,phys19a,phys19b,phys20,phys22,phys24,phys26,phys28,PREG1,PREG2,updrsa,updrs1,updrs2,updrs3,updrs4,updrs5a,updrs6a,updrs7a,updrs8a,updrs9a,updrs10a,updrs11a,updrs12a,updrs13a,updrs14a,updrs15a,updrs16a,updrs17a,updrs18a,updrs19a,updrs20a1,updrs20b1,updrs20c1,updrs20d1,updrs20e1,updrs21a1,updrs21b1,updrs22a1,updrs22b1,updrs22c1,updrs22d1,updrs22e1,updrs23a1,updrs23b1,updrs24a1,updrs24b1,updrs25a1,updrs25b1,updrs26a1,updrs26b1,updrs26c1,updrs26d1,updrs27a,updrs28a,updrs29a,updrs30a,updrs31a,updrs32a,updrs33a,updrs34a,updrs35,updrs36,updrs37,updrs38,updrs39,updrs5b,updrs6b,updrs7b,updrs8b,updrs9b,updrs10b,updrs11b,updrs12b,updrs13b,updrs14b,updrs15b,updrs16b,updrs17b,updrs18b,updrs19b,updrs20a2,updrs20b2,updrs20c2,updrs20d2,updrs20e2,updrs21a2,updrs21b2,updrs22a2,updrs22b2,updrs22c2,updrs22d2,updrs22e2,updrs23a2,updrs23b2,updrs24a2,updrs24b2,updrs25a2,updrs25b2,updrs26a2,updrs26b2,updrs26c2,updrs26d2,updrs27b,updrs28b,updrs29b,updrs30b,updrs31b,updrs32b,updrs33b,updrs34b,updrs5c,updrs6c,updrs7c,updrs8c,updrs9c,updrs10c,updrs11c,updrs12c,updrs13c,updrs14c,updrs15c,updrs16c,updrs17c,updrs32c,updrs33c,updrs34c,updrsmental,updrsadl,updrsadlon,updrsadloff,updrsadlmin,updrstremor,updrstremortreat,updrstremormin,updrsrigid,updrsrigidtreat,updrsrigidmin,updrsmotor,updrsmotortreat,updrsmotormin,updrs,updrstrt,updrsmin,updrs4a,updrs41,updrs42,updrs43,updrs44,updrs45,updrs46,updrs47,updrs48,updrs49,updrs410,updrs411,vitl1s,vitl1d,vitl2,vitl3s,vitl3d,vitl4,vitl5,vitl6,assess,fbeck,conf,demo1,derm,ecg,ecgr,mrf,nvitl,fphys1,fpreg,fupdrs,fupdrs4,vitl,site,race,rImaged,rPD,rPDlt5,rAgeGt30,rHY,rMed,rMelanoma,rPreclude,rNeed,rEligible,gender,incsae,incsusp,incterm,increlated,inctermat,increason,incafter24,incendp,incres,disp2,disp3,disp4,disp6,inex1,inex2,inex3,inex4,inex5,inex6,inex7,inex8,inex9,inex10,inex11,inex12,inex13,inex14,inex15,inex16,inex17,inex18,inex19,inex20,inex21,inex22,inex23,inex24,inex25,inex26,inex27,inex28,treatment,treat,disp,inex,classify,enrollyr,demoyear,dob_yr,inexdays,demodays,onsetdays,diagdays,medstartdays,physdays,phys21dys,phys23dys,phys25dys,phys27dys,phys29dys,confdays,pregdays,nvitldays,nvitlscandays,vitldays,labdays,ecgdays,ecgtestdays,mrfdays,dermdays,dermexamdays,dermbiopdays,mmsedays,beckdays,updrdays,updr4days,assessdays,daystotherapy,dispdays,endpdys,termdys,SAEdys,resdys,lmeddys,wddays,VISIT_NO +a030,ab304,43.0,0.0,0.0,0.0,,,,,,-2.0,0.0,1.0,1.0,,2.0,1.0,19.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,,,,,,,0.0,2.0,ABNORMAL,75.0,150.0,100.0,-3.0,410.0,460.0,2.0,1000.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,3.0,5.0,2.0,1.0,1.0,1.0,0.0,3.0,1.0,1.0,1.0,26.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,94.0,73.0,155.0,96.0,71.0,148.0,91.0,69.0,146.0,67.0,72.0,1.0,42840.0,46080.0,46980.0,30600.0,100.0,175.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,2.0,1.0,,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.5,0.0,0.0,0.0,1.0,1.0,2.0,2.0,1.0,1.5,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,2.5,95.0,95.0,7.0,,2.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,5.0,,,5.0,1.5,,1.5,7.5,,7.5,20.0,,20.0,25.0,,25.0,,,,,,,,,,,,,138.0,86.0,72.0,130.0,80.0,80.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,abc,1.0,1.0,1.0,0.0,1.0,34.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,,0.0,3.0,0.0,1.0,0.0,4.0,3.0,,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Placebo,1.0,1.0,1.0,1.0,2002.0,2002.0,1914.0,-28.0,-28.0,-404.0,-28.0,0.0,-28.0,,,,,-6.0,-28.0,-13.0,-13.0,-12.0,-28.0,-28.0,-28.0,-28.0,-28.0,-14.0,-14.0,,-28.0,-28.0,-28.0,,-28.0,,659.0,426.0,659.0,,,658.0,100.0,ab +a030,ab304,43.0,0.0,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,1.0,2.0,95.0,95.0,7.0,,2.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,3.0,,,3.0,0.0,,0.0,3.0,,3.0,13.0,,13.0,16.0,,16.0,,,,,,,,,,,,,140.0,86.0,76.0,132.0,80.0,84.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,abc,0.0,0.0,1.0,0.0,1.0,34.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,,0.0,3.0,0.0,1.0,0.0,4.0,3.0,,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Placebo,1.0,1.0,1.0,1.0,2002.0,,1914.0,-28.0,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,,0.0,,0.0,,659.0,426.0,659.0,,,658.0,100.0,ab +a030,ab304,43.0,0.0,0.0,0.0,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,2.0,0.0,1.0,1.0,0.5,1.0,2.0,90.0,95.0,7.0,,2.0,2.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,5.0,,,5.0,0.5,,0.5,2.0,,2.0,16.0,,16.0,21.0,,21.0,0.0,,,,,,,,,,,,149.0,88.0,80.0,136.0,90.0,82.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,abc,0.0,0.0,1.0,1.0,1.0,34.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,,0.0,3.0,0.0,1.0,0.0,4.0,3.0,,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Placebo,1.0,1.0,1.0,1.0,2002.0,,1914.0,-28.0,,,,0.0,,,,,,,,,,,29.0,29.0,,,,,,,,,29.0,29.0,29.0,,659.0,426.0,659.0,,,658.0,100.0,ab diff --git a/pandas/tests/io/sas/data/many_columns.sas7bdat b/pandas/tests/io/sas/data/many_columns.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..582316fc59e18a1059915cb9139999cc61995b9e GIT binary patch literal 81920 zcmeI4U2IfmddFXblbEkAG;9l9x-nf}z%H#kAO32o_IP6&tR;zw-7Kho9b@Nnc?Q|P^LyTpbIx<#^PV$v#whjZ^^gC#Z^utouK3q~`13lo=${jZRz0_T zMM|mPP&F#?sMaSft(+F@Ztt%Dz0NJ0rUmC99qm2s^*^frky52CU;E2<+aHlStJki7 z7E!qEw;B_Ti5DAxkV?MxYIoNwFH^awWI!??8ITM}1|$QL0m*=5Kr$d1kPJu$BmKVJ{25^INL%k8+3P#{&unXJ7twDSyb4|iZ>pzl|F=>9E;tEJp41JfKv%#^q@v>UDry5bvp3V(aZdd^h4F;8;zaEia+p1K?rsD0l)q z4W0upf@Sb3)^i=a3El?pf|KABs1{)V!R6p8uzrEt--ZQldpE!v!8F(kZUiH+1KbR5 z0lOBsd~XAHfV;tw1unm1Ugz8PJol(L$K}{93QYA+yJ(M9bgyO3+{f#?f(ck2A04B;9>A6cmh2A zjLYXa@FG|SuY%W6|0Z}FybDf(Q=qDI+f@fH2Umgh;JP|@9ZADC)-ATzjm?PnfIGl^ z-Ev#Lx9*oL5260iI(I92y3XB-UPSx~;_o0nQRn7|53s%u!H0D&54DRNmx3$7HDCj{ z0ZfA%!47cCBDeoNVDBO~zh=SwBDWny_+D^7;)lRD!Q&`D1wXsUUANA`FT&qij*t*#5 zZv@|h_B~+lVwd+Um|yJ1!5HfA2M;5D9Q972{aN&X0elO*0=^Bt1HK1NfFFP#f)Byk zC2qTyf-9G}{ayn$fE&OxxDo6Cw}3rhFPH`MU=iF4?gtNnZ-U3cQ{dSpF5efio#iF& zI9vg*E^)`{I(QSjjrMopli(Ekt6l2aEnVtvp;y7{m*V(>tzZY(1@@wT7R)0)w$z=c z2M|BJ)E)n$;EAPfUN{TCfO>CXJFbH7ApRaW0e*n>Of7ZesCJoK-^yiff7UE>>uUfv zEOW;-4Q@od1HJ|A?4)CV{SV$-Z1eAZb+jHYU(0;Az5A8tRaZyH%g=XqsLZaM`Po%$ zZBy;t-J80*+rb2w0@Gj<*bKI)ALeqee{a+C-|rtD+G)P9ZhBtxD zU<;T?WNiIJq7Uo`vzmoL9kw+68+ZK{23@~}!KRFhH=&;Ew=md(cnjjKXy2x}>G>B@ zx&B>+;#g4`?k*_H5pYjo)VgM7YyiteS7%SL-JF{z4lQ+umbybr-JzxK&{B73sXMgP9U3OlpIIL5 zA1(|Qb|&;sQvam%Pg?&p>7Qo()1rS`^-r7rF`Jw#fi4-ChC zFzeWi@@AAbqr4gAdH~sWdH^}1yana0C~rl1E6Q6@-iGovS8k4j=_jf8!xGz>)W^@d zK5kvFY}EcO0c>ix3R+i$72(^7AbWuh_Tm;jSt z3QU7dU^CbPwt{Vr$;OP?X4_}7u`jmVA6w4GmhHLX>L+4t6S20*SUeez$NEgh`b@_9 zOvT!!Vr|p0csdquip85^@#a{(IUbK~Lvw5!ke?*-lSF<}$af0)P9fhZhWxIr^l_O9-o$a99rt}W|?U0b4-9qFa@T;Ca@W70b9W~M}3?tV^SaI z*m8etIU8HHbD`aaWFpo!5o?=_#gp-Ptj}bu&t$C6RIF_();1lBr(^M^SiC6~Z;r*A zZ_f8!BeCy`Qa(5^^~Zez=_y0$D<*UvM>f?KaHvg=P{Zc8UzKj|cx z0@EPQu{6%HG|sU!&apJkv2>!*^_xiGQl~8SJ{g)ZZm2J5j)?22*t)san{sol&y4Ep zuCBMUFW3LNc5|;}?vKoTXzpk9{fE7e(AQdbUDNTM_JU(r$n7$2H^wt<`yL%A<_ekq zT%mJN-_B)Kp{v9EV(RB*B{`?!+oLPP; zr(0@n%IgExmp8Fpxgot-N(~J4k7jhh@Qu+CL%qU5!-DRuXc{7xDe87Mo*6a6$Sh}e z>e@-25WTE>wCxKyormne=!nUmp}vG!n!Fh|`5G9?jhWmSntFz&o}sB{X!2|5>zjKi z;MxsG))z0qDp4VP_xu^5RP4S=q(l6buI#6SNKYIPQem%1A(r@DV4?WhbG~j+K z1%9F4FHEO@zT`Q*R7^7PRms5D)z|f=X5EJ(>i8;bH_o>P#3S0B3HPCi;`pgRJR68N z1mY9!!?&6J2jba4yde;usC7FuWBUW~Y#`nch)-a`nbChBo(;qs0`Z9jf$a~(vw^tl zWq!Z^_aiq8&b1e5FZnYWknuCO49NNO#hyP8-ReI+;IExHjONl%s}|sULCStV$GCfb zqLg_aQLF#+ofW9>eKq-7^7Z8AeVcjt_U3(oLuGvov0d&tfE0Q2(e z-{-XA2g#3+A0t0Weun%!`6cqp-lPAdA$lJ*~$+wbkC(n=%knbTMC*MbY zko*YwG4hk-XUNZ!Un0LuevSMF`7QE0-?7UroN2d_8%Byp6n_ zypwz@`F8RQ`2hJI@^SKgJozQ^%jDO{Z;;<2ze9eH{66^ua(uD2 za{a>>Ybw41?;-v8SWUi`d_8%Byp6n_ypwz@`F8RQ`2hJI@^SKg=u$`^XQHA0ak>4P{MSh3;9{GLp z2ju<>>Hhr27w;aO|M(2dm#>bw84qj8*OMp6+sNC=JIS|_Zzs=?50LL6A1B{OevteK z`7!d7Hi^-MajV~%!d?l5yA#WhxK%OSw zNZvudg}jHnmpn_JCohukCErhei2P0R!0YFPks%=vQQaKl)V)#G^pmei`T!`YEq;eMXPEwo9czJPO2BAYOhp z(0?Ev1>!0YFaL6&|3Ewn#8n_(UJ>X&5RU?J|1dam-Bo^$A=e=fY< zQXn1$;wlg?yFc2UIX(mNC=geHco~0-H)Hz)@hA{ifp{6$*ctr?;!z;30`W5L5oYus zh)03A3dGB}2c6M>ARYzcDiAN@Jt%T zHWBjo|86|U-~YQj$lw2CJXC?af0Orb^8W3~eg7usznuSa{>%AaKi_@zjrPc{S?q6-@7E^1IGK-(IO`T3n^zE~*r@o3;D) zpkLo?`KWo+{=Ju8e&xmXEjCnsC35U@RNKM2vNEp|zO6XeUYtM`hjDFJn!gtuC_Urw>p;k;IBGH zh(|u&<>Ctc`24*;;o_zLuq;hFs(=4C)W`9>or71~&o8QcIkqoE8V7#d`#h*>FTY=0 z&&QcukKNY~#Y5${F>i1?@4lU<-)~PJpQ3(zoco!t`+k|Vk?pyR@$ISBxOnCG@OGmg zUoQNy&Eqd>9_-GUUzpE%KYd(bJo|F*>YI`6q7sLzG{xbnjpF3KpHG`n#;v#X`;LB| zj&{I%h@VFu!0r7d z0re{FeZ5(7MmxX0&x(G#=Gy<{cf0bL{#@U%>%eyx;O4)HY(3Arf9+gtEZeweG(O#3 zq5h3}K20I2Rn-35aL;vAPV4vUsc-}8gMPi0a%#`z-jD8&lzE-_BS-JP-19K%={wy_ z-)Hq^!ETB2tNg0d?TwoA{`fZwU-mrAcR#=E`}d@8U*$^Y4|IrriVcHC?l&*&%ek?9Ps$JO_55s8`DKk^|Fl`+Hw)cvJ4*a!!M~aC=M~fM_v~4Dv%pP!*FNrD-Yocxe75xY zp6goX_GZD4Y<{!g<4nJG9G|cIemy^}{(L{&uYYs!SZ@}5Cql`9WI!??8ITM}2EH^I zsHw4=@)>=khYz9s4aHZT{;17$Kjb65U+m|0a~&6-ce`1Cr01VQ`+*;#e;?*qtsa;A zcAi`w^3eAJd_SI{_FNuvA7^rYxZH1_kE4DG-&d*nkzS?$$nL@CJx*kOU(9?}8LoKP z%*Ubf{2$}Xmn%QsFm5VwKaS|5I{#6gYMMU2y!-9&{e0Ry(dX8ya6Tyiyx)J+hj_`y z{dvUn8#;Sd{YZ~jAs^|@KDqO$>GM9HTixb{w4V8K Date: Fri, 7 Sep 2018 21:04:20 +0200 Subject: [PATCH 2/2] BUG: Fix parsing of sas7bdat files with odd data pages (#16615) SAS can apparently generate data pages having bit 7 (128) set on the page type. It seems that the presence of bit 8 (256) determines whether it's a data page or not. So treat page as a data page if bit 8 is set and don't mind the lower bits. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/sas/sas.pyx | 2 +- pandas/io/sas/sas7bdat.py | 16 ++++++++++------ pandas/tests/io/sas/data/load_log.sas7bdat | Bin 0 -> 589824 bytes pandas/tests/io/sas/test_sas7bdat.py | 8 ++++++++ 5 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/io/sas/data/load_log.sas7bdat diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f8c742900cae4..949bc7b73af7e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -743,6 +743,7 @@ I/O - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) +- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) Plotting ^^^^^^^^ diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 8395dccc1e744..a5bfd5866a261 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -375,7 +375,7 @@ cdef class Parser(object): if done: return True return False - elif self.current_page_type == page_data_type: + elif self.current_page_type & page_data_type == page_data_type: self.process_byte_array_with_data( bit_offset + subheader_pointers_offset + self.current_row_on_page_index * self.row_length, diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 58b7340caf558..3582f538c16bf 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -301,8 +301,10 @@ def _process_page_meta(self): pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types if self._current_page_type in pt: self._process_page_metadata() - return ((self._current_page_type in [256] + const.page_mix_types) or - (self._current_page_data_subheader_pointers != [])) + is_data_page = self._current_page_type & const.page_data_type + is_mix_page = self._current_page_type in const.page_mix_types + return (is_data_page or is_mix_page + or self._current_page_data_subheader_pointers != []) def _read_page_header(self): bit_offset = self._page_bit_offset @@ -644,11 +646,13 @@ def _read_next_page(self): self._page_length)) self._read_page_header() - if self._current_page_type == const.page_meta_type: + page_type = self._current_page_type + if page_type == const.page_meta_type: self._process_page_metadata() - pt = [const.page_meta_type, const.page_data_type] - pt += [const.page_mix_types] - if self._current_page_type not in pt: + + is_data_page = page_type & const.page_data_type + pt = [const.page_meta_type] + const.page_mix_types + if not is_data_page and self._current_page_type not in pt: return self._read_next_page() return False diff --git a/pandas/tests/io/sas/data/load_log.sas7bdat b/pandas/tests/io/sas/data/load_log.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..dc78925471baf4cc3dea8d568d27c59327c7d578 GIT binary patch literal 589824 zcmeI53%F!gS*CY~iya0YVL-*m<%lg839-+mZvsv=Bp^dBNIDMq5W_LunY4D&q5A}a z2oy7zAjAPBOpyB}ARyN?GAN)$AmqY;10)h6LPSJRK@bE%B(wHewf5R;|KIwn`XIaN z`=3=joIbT{RcF2HTkl@CI;X2DKCb%vXFc)A-R|<|@B6*Y)v?zsUV8U?-}x@f_o+yqh58v&AUJTw@-M~t>3(PcT=7En}s(Is;d6aZ>l!e&s&dOCv08Wx_HX+ z$@|a0@S@iBr4zQFu)Or3ZCh7PSloWX(h04X*1x@I=k~LkH**Uo?fob{f@uzxMR{6wtU+^KXKFe`uNXl|J6T#LMT4Ie*F0Q%R1LT|1E#v z7dqFk7+)WR>*GK7X0C6^T>tzx{TsV~&lQ>Ldo$O!WUhbyU+~`_u8*r-g=TWkSDt^~ z&AazJcK4~LFDxuPZ1?GBEp1)gzPQyr0T1{3qfgpXzi@_D47^iZI(+cL0|zeLx+q>4 zUpRej_58i7hxRU3xqmYAp}I%^ zc-If1e;VOS)(?-`b4q^k;O-WW+H=w={-)vcLyl@UmDX#$-Cx@04-FrZFX7`{KLkEF zI3#>D^UvgbCGh3;m%9Bk6LYH@X20#Le`3B&#R>V6^+S`N9lm7!V303aKN#do)(;II z8r+lnC+GSh@J%~DWSSv+(`>!a*Zng&UkQBEjt{haQ{F#m`7#ynj(?eAt!|k8cDDU9 zQ*lB*v3>~aBeXWC`RdaL*3U21_m{wj6pVbl>xVHP93mng@A@I+H%xaOHhWO|o@A_fPH`Vxn+9176W}o$S{}k(o zkl!@p12f+=;{!7vs(bX0^ZcPb|1`q)og2XCBMp5%OYnvJ3(Al1d@SK>_FoX3yVckJ z`8z(1e}YfTkMRC-D9jWm$Lq$x_A9UWm1gnvzhQU{$(m&_fKDA&irb=;2%Ql`iIJ76H8y{E5B1eg~q>3 z#iM_I{g)}0lHg}EaQ!1ud)C zvi#M){1bd3KZGd#rI?jJ(z`p3rC_=n(te~f&=KZMxzkAUxszk$ZT%*3+mU#3`o)Qhk5<)7dS z`JuXZ{X=CUg8JP|_!|E*6_?|m%AmbCGd?Z9u78-a-4tq^Oa07{O0s(U+W{m7xF`OkN$D;HU1$u*Zy_?^fB=AY57I}X!!o& zF!B#=?z-Tje}4VGZT`>y1YgJxAx8f=`5ONaT;d-upO#FXEfXH(M1Vl(3l;~zro z`iII$6nLdCzUN$n)_<9b6Z$9kwEPJDOZW`>C-}7d2>si9+(jGN|D?Hp3O+4ALjMvz zgZ>FVEx+O)9Lv$gzQ*??Kj&BT`6V>O7ynRH@Y+oBtM4zy{JZ~;J=OI1C-}7d2>na= z432+-Ps@*R{Cm!?BmcI;dtU4O&!2B*is3h>SNpnu3cipZs(bX0ldtg)!MXOY`=^hA zmru(t`bWd}#XsASe}4U!i8(b5KkLgs!KdXH{o~}D-alSGEx+g=4d3&wLjIw{73Y!f zpS4h)Z(IBPefv>;7^W}(1YgJxA$I*^<7@mwa4xkt{&D74^J)2Y{bS3|^*{gl7qmHH z%s+%^#yX>kfVfEnmnF zA)4_I!5Q-pA)4_o^ZTn(=QZkk4JRdIoz5lkGj{Cmee^Yh84ZeR=ZFcW`RdufozTdy?+-l4B_5R0H zcl^HI|B&kYYyacp`+fYku2k1Pv#!$d<9cnb{!_JSy{+mCx2e8;YgK*k*4vG*eaAif zuKRy{e0Ti!JZF5VZsq#1{^xCvsqXBbRtxo??)ld7^E=dkZa${^x$*N&r+k+W7fG{ipl;UOIk07(X9BUH@ZTwOfCoxwvJ_zcT*wL`Pdco=_dT{^zCD zUDp4+wyM=}k4?8-|6`%)_v=sHmwzAs@-n0AkJtsZp!2Vx7fAze5 z)yd;O&)aw2`twu9f9_pfTRpsQZO>CL*jJtCzh9`I9K2xPq1F0#9J{~%wDA4HntNs8 zey85=q37>=%JB>LJL7&2z3>9}H|*PcLQ~%TCHwa5cmKtz{`>YVte(GjVa+{XIC##& z+PUtJ@RzyQckelA&!bK`skwCe!3z(ay|40rxZmD-LcjyfF|MBa4_q+d+`!Ae*?&|sfec$1#I{V;x=j}Uxt$O(Q zPv27Guj9@W+R(Rk)%4M%?0>+MJC5ZEeKtgQ`|;o1eI7Cn*F*if&qF^8*KgPP`@-|@ zjSGfy!R7Ar@vS-W4B>xg{P&^P+Sk54Wx4iyn7{v~d(`z0_oBe^E%&Iu|LXX{6Ym~g z@qgZB>wVV0xn+ELar_tfd&Bsm`=k5!hI{8JO_7kOzwo2ur=g#OKbzv^8?^;SM=tL+ zxmrH=Vo2qfae?rBe6M$7oPYQ+Am;q=PWk_k%8dWc^pDL|xW`o0-CF(Zj_cpwqje>H zU;6B}j%1S?$--R8zq%#hRiIxtuhnHgkZJGb@SoY#{9T#fpTBJdEfLJDUlE)e@9XuQ zp98S}{zROgne{9BaIOo^_s=>(&iq{c$N&D%wiSeE#y{}7@X{}7xp{}7@X|ImkXjroTV&G?7ljQNKU&G?u3SeNgg zE=d0Khh>Cl#y?G~*wFGv*&c zB=qkW?)Dou@80v+-KU=J$G5YVwk~d8M4L1*yY z$*YI2Z0)mVcCd^}lbcRlM$>zM;<0ignu(J)cQ_t$)84 zcF~r9EPTi8x9y+cGv;5yXV5>vr{zcJ-?jUmW_tV+d|G~l<6pvO&_BVa%l;Jk zhdx~3W+8F?ck251o+*arE8S~-&0hpx$Pd*$`p3!F_=n(J0&)Jy%ctcR{iEUg)4Tm9 z82`}WX5wF1KOEY3&cXv1#xfqLf0Weyr0Hr`Ht@VJ{{&yi4+gkzK|cdg4U{S(!_>mMqUVq~B7#rMTuMf1;0#R>frd|G~l{v~_{ z{S$mzeuVy=`w83r2|g`9LjMvzgZ>FVEk8#8-uZN}|6*tSJGaZH<;Un>o6n+uT|O;8 zLjMk*`3&G+X8%miKU@2sI~V40|5NaV{1Bq_kBhJI55c*mPTfEKiYKi9`1rK^q<>s| zuKyj?{^yPbKE9A2+CMYnAA&RHA3`+a-_-L@UjGoH8UGNRG5-*v8UN5Pw{iLT{#nmp z{QMFC`X~6b{0RL^_ze0d__X{8{rkqT8}iS8zAmHbG`E<(?k|E*%P;!J z$v3@!ynI@I(LWl#LqGXUu>YA6i*WoCd|H0dKbrY? zEx+g=3tx4?8?fARgZp27J0OQu*|VA7+f{ZGjk z{6lr``iII)0vo?R6TW8r%TzqhKRNTO`Lz7H{$-kBnh1N!uX_EzoAyugY5DQ_m-5;3 zPx5K`@%cA8d(X4L{%7XK!8iU%J}o~!|584C{z*P9KR*Ae2cG&rEc++hp*A`4_hF4lAieJY5gbpwDm*RKQ_KF|IFoA_fMaUpPw=EY58^iBj9`P z%WeB7__X{8{Y&@^`X~6b{P_GEeft}x{gZrJetiC=eD?g4d|G~d{#B3u>cw*ZGdF+X zn}13^Ek8d0Qa*eBNj@zKDY#oe@l4(B#wXn zd^1xFzd60y)BcO(3;ChCNB=na8vhWSYyY}``WSfmwEUufEPTVCcoFc={oo>eME}D6 zS?l~^X&%qtCEqwdh#39j`% zmtWmKeKLN2#>l7T*Y%Hp?>&!r&PMiMXz~+$T7HE7C42_`6MR~Jg#LYH*KeElPw;8^ z5&D<#8T3!^Y5DQ_H(337)BZ_5Ek8d0Qa*eBNj@z%Z3d z-|{@3|4F`aeh@MG$H^D`gK&v|ynI@I(LYW;*Z+>{=WmzVzyF%dKlBj;GyWksWBwsT zEB?X5OrDekC9Kyuj?NH-<8{*E6?AzqQTvDt(JdhpL1wo|KYVX4KUAXXqy{l{c_Aa<9OIuejIJC5NaeMy1SNX?K z@Qw3>h%Nsp`GS8C&b3v`KUzL5zm|V2e1E+I_?NqXw*0Gmf38^GiulST|4v_9J%8`& zp}h-dtZNtFIOTW2Kf$NnKU@A$@&*5L!Lna=4EiVd zwEXz|tA74D&y(}d-2Ibp{2LF0|G|G;b>#BR=&xK37dLI$H2zk!uhxI~{7?IjJ^v-2 zmLH%0!#7`Q+JDKX<;Uk=%4g3%$*1MV=ilh+TabS%c$F8&U;p{OOfmfC^lDG*PstbZ zLv@e-aq>0(Avo9mb^r7+@bYQ-MgM5{esSBSVE#LK|8=~+ZhiiAWgb6&Iud*#KZF?l z^PgW_ZhyXyfUofn!6p9j@@e@+|2X+v|MS0p1|6=t{Nnh>%NO!Ph-UmlaK`*Yh-UoD z9GqJ64+UnHjkG=AV*J%a6~$l+T`jl26Nz(7*S6+O~g! zPs@+czl6`Ae}YfTkI%p1U4Qm}f$?t}UV_K*&!2B*is3h>S9@B&OTLgFs(bX0ldtg) z!MXOY`=^hAmru(t`p3do{p_=lf9P<<1sDAb`)955_igid{x11Keh4x8$H~|Dhu{+b zc=@#aqJNxxuK)S-cXYU7%s+%^#y5>MM%(@gJ}o~&{}Mif{s}%UKR*A4ul?u?!2Ty1+}&MKj(@HB=k|Hr zf02A4KZGd#Y4Zd zT`>M-ez95f&!4Ykip|mbLGp$BC#rk&kCU$%{}5c_A1|MlU-XZKZ+O<)?`_0Czkbie zJV*XXJ}tlKA1B}R{_*l@`9=R&_(m_f$h3cwPs@+bzm(6Of09qjkI%p1^FECH+kuC$ z?))=T48J+O+SC5E#`o6#(vErD|CD?oKZF?lFEk8d0(){fCC;7De`24FjUu)Vw$*1MV=U>Wa z&p*kh<;UmW-~sn~5g7k=;+1zC|NQw%rWk&6dbOwZyW|V`p}I%^IQbg?5S(lOx_|l@ zc=@#aqJJ!W!xPU#{-MoX7hLqupRep_|N96#=kfec@`d~mV)T!bukjDTCI0d9Y57I} zIQd-v^XGq4um4)-?_~ZVL^J*&IAi`HL_YsUH(Y?mzs&rlJN{*g`6-V6wWs-~d{qKq%kLj8Dt2>)-tI4IcPG)BZ_5Ek8d0Qa*eB zNj@zW&FR&i*6)%p zGHEQ{vkv& z{vkMH{vkv&{$&m>E%}EK&G?7ljQNKU&G?sDKUne)A)4_I!5Q-pA)4_I4en+Bd0=&I z-@@M2wN=*?lOBET?ZF^q~e*Pg-aYFwDpOzn?e+i#K{{)|wAD@52kNxWJ zTlP=#Y5DQ_m-5;3Px5K`5&C!He%t;DJ}o~&{}Mif{s}%UKR*Ae8~+6Ox4acE0pk2~ z{pUM!#qgWct39nBB;PncSoi23CtvUn!nyXZ`=^hAmru(t`p3dIT)V?d!2AoJ)@h?}*PjU3GJjbo`^^3*%p|;&J}DK0f4%>B;$5 zXOdsdr{&l2kAQFVsk2P`C;7De`20)x?D;48wEXz|8(jTIru~zAT7G=~rF{1MlYCl! zeEyAYJ@Qhy|B@TH`R1RJPs@+bzm(6Of09qjkI%pAySINC@NW?>0iu6?e>YPMzd60y z)A~X3h5S(6qko)yjeiKvwSV0|eGI&OT7J<#7QVqHj|2WKq0J>1T=Xxj-&^~ii}QH? zAo<4mLB!}ECtvUn!X^Ik@@e@+|2X+v|64zQn123A>mNk4;va;wr#Sl8p5~vDPn*AV{bS<`<6kbnIREtH10$c7U)R6+=c_*PcGLbzJ}o~! z|584C{z*P9KR*A4Z@t~iH?sf2a{rWkT7G=~rF{1MlYCl!g#PXPcia96J}o~&{}Mif z{s}%UKR*AeJ0J53%j2Kq)AHlo-@`e0R-J^e;e2sqy&b5EtKYa|md|H0dKNh~Kx&rw(@#p`-`fuLPA0%JM4`=6N`2jBQt{Xia9AGv%p`iqyt#Z6l_`EO^G|2*H)LhJl-c`kqcQ1FHP5MtLq zHonF`1n2UL;~!^!HJ_GW*FU!WT>tauk5lje@cM@k&G?7ljQNKU`TQIH<7sI8%SPx5K`@%fkX+4E2GY55WQcjVQ! z{S$mzeuVxdd;CCu;N{cu zi~iB@J^6*mKeV~)f{Xrz{j=8j!?t-me;)|GkRL*f{&Dg({vo);KVCj9zvv$)pX-1A z{C(>6AFqE1(Tslx&X|7)k+%MVoPw;8^5&D<#8T3!^Y5DQ_H$3y< zuiwc23(Nge@@e_;`Iqw9^H1_=`4Ree{f<8{?VsS&@+0&w;WOx;;M4Ns^KbN_focCF zpOzn=e<`0m|0JK5AD@4NTV8zJJ!Q`SX=bG5qHAYESET$rtiNb&vjW@-_Y; zIM@Dl|MW5N@@e@+|5*5{5UuzJ;cWQ_5zY9QIXJQ8A3`+aAA&RHA425wZ}_<{ zfcfWgX8q9hFHWa&p*khahJT&^7uCp zd|G~l{v~_{{S$mzetiB7&wRdV|0JK5AD@3IpFRI1pOzn=f1?k77WuaWZ=Lx5S-pBBC;t%azhnk(zWHY*__X{8{Y&@^`X~6b{0RL!{At_%2|g`9LjMvzgZ>FVEk8p4 ze&qN!gYj?T&;R-Jl}s^Z+1|X=)B3#~uluL3A7_3wpO#L zt(N_hd|G~d{-u2O{F8iIetiCowmitRf09qjkI%o9&z^shPs@+czZ0I0{M&_>sJ?&J zule)MOfmfC^lDG*+mYZ4`JuW;|2X*?{}7yO|GIzr7jp-^u(#=eK73LvY6Y zLx^VlLx-C#KTG2uLNwzaf-~kHLNw!F=HSwje+bcxe+bT)e+ZG#zo*@R)_Cdf-kHeP~E%!p)wQ6e1VzpHS51j#q0j*tLN88nPSr*&WumXuj?NH-)FYG z4V?dFDo*I1;M4LW^e^Ev=%3)z^5gSwc;!<~`zQId{P_G!`Rw^8`Lz7_{2RUO@Y_v~ ze}YfTkI%m}KYRX3J}o~!|EhalWZFN;r{%}zU&?3CKgp-%$LHVZ{Eq{9xUqf1G^5KM3dAzwVzt23|fbzvv$e-|!*Fy#vfY(cz{G zF8UYt&*JA_T;>N>*Y+*!U0qvERfErZ@=x-G`zJz-{&DgJ|EB%-H+Qu@Kf#b+&8OuT z{o~Bf^*{gl7j(F3%s+%^#yCw~rPx5K&_l|#bd|~~U zt9WPrnJYFayZZH+yK}E=9+2$B%dw+Qa*eBNj@zvr{%}z-{8#aO#3JK zwEXz|OZn{iC;7De2>rYGxFcZvTg0o>IR5$17i5azH>X#7TEAEDaocK>``=xv`z5b# z*_1q0o_z4g<(tvV%j1i~P2ukvi1rQr2en=FpQU|MzQ%t9=kl-p@ZEryPs=a*&%!sl z#{uB~GCEv!!A1Yq&yU?Sxpn@rIFFydk$mI)AY$~7lP~xO;S&FN`Lz6^f1G@-|M}0~ zpu<&T{vkv&{vkMH{vkv&{-MKFWBwsTGyWksWBwsTEB;Nt|4KXmgosxBgK)O|gNS_o z4R#*{>%W!E`rY?W7v%W!H<@B}fk}^^_J1UwwtnyW$Ho`df4ThX{^^79>mx=!Ex)dR z^Uqh^{C?B^Nj@zveUOp|q=pPN=&;2p-Z{qKN z3hTet`NPs&e*Q)9h5Qg=^pBIT@ejcz{_*l@`9=RY`CR{7|NKjC|2mF;ynG=)v^Q)-tIRWJPEcY*!SOvU;9lYCl!eEy|;_WYB4T7HE7-Q#TA{s}%U zKSKW!K7;-VJ}o~&|K7f4+dsjlmNci;~#=E<{v`j^RK$`j%fVL%wM|WU#6I!;^<#{ntw{ZaQ{Sg@A`+z z=vd9GGvRB-zf8sB{L_yQnPLru^3BZnwEVjM%|GApU1yp0Px5K`@%fkX+4E2GY55WQ zck4TC`zQFc{0RL^_ze0d__X}^{2P4qjeiXGUorzX-~3bZY5DQ_m-5;3Px5K`5&HMO zWBvsAw}NN>(LcYxn<<9hoL=o|{V))GAwN|2=pQFv;~#=^?O*p#9|JF+mS6Obg|E8m z4CG&C|2p~?*6*$T&y{&Re~^43KZF?l{{9(EM`>)CT zL;Kfe{6lcY{6mOl{F{3H$2lDqif7|dB+4s--HGjU6DTd#iUhQdqJrsN)KUDYVA17bqAA)o3 zU-wTR123PJU-XZKZ*C-}7d`1~84 zeCnTC_D}L@`SJOe^4arG@@e_;`B&ZPmB_#CcnRS9XZ@N#U&$21Z%(iFw0@9$AwN|2 z=pQFv;~#=^?O*p#9|JF+mS6Obg>P`$H;{j5bIAo4{R{KY*7@J|c|8AE4}mgpKJssC zDoc14{R`t?YyWfSJbwOVDELBt2r>G{$=CRY;1d6M`Lz6^f1G@-|M|~(d|G~d{-u2O z{F8iIetiB7u7B_cH?sdpbN>{4T7G=~rTN+OPx5K`5&C!WgReI2pWxH-BlIufGw7e- z)AA$qZ`bpXf4lG!!1vGkHGjU6DTd#iUhQlBAoxOlsP54}PQJ!J1n1hn?w>veUOp|q z=pPH;@TYG<{$+lCI{FvppRM!1U2}QG{$=CRY;1d6M`Lz6^f1G@-{~guu zf7#jo{S##Vq5W$!{vkMH{vkv&{$)09EscK&(Tslx&X|7)kpy({r+oJOmwZ}&eEyHV{oHFzkH3OX%a6~$G(UU(Nj@z0zHxrA?$JL^zTh8(b4_3OPoDrUpO#U5Y709;Eef)5DESJ^dcDla`TtY_?Ih|ANArZeT{#DPn*AV{G;Ow z<6o}go$)VMET)wIZYKHFd|G}T{|NX-XFl7sf09qjkI%o9&z^shPs@+bzu|*!GVPz_ z)AHlIDk)5pNer{x#@W8oX!_Zh&y6|}kJ zf{XsG@9(-{vbF!YIFIKKl5dF1xc{y{`5 z{y{if{y{`O{|0Y-5g7lrW#%v4@h?-%PjU3Gea$}wpEiH#`p3o>#=l&CasKJY2Sz?E zzpj7t&o_G94W|8*d|G~d{-u2O{F8iIetiB7PyOi+Z)E?4<^C!8wEXz|OZn{iC;7De z2>l!0_y3yqPw;8^5&D<#8T3!^Y55WQ_u8i*|1!Tn$oJ2BVdwXEGsX0kpMSNd_1{qN zh5S(6qko)yjeiKvwR_z^ee?73Y57I}Soj7XxE}bolF2Xn7v`VyzW*ut#`!_S=pQFv z@DIWz{_*l@`9=RY`CR`ynxB7g+ibl4p(9Q+{vkMH{vkvw{=vf)m!F~W4#y4>gZ1CGDc66QVlL^4kDm5l27*spzjys(;|uG* zTz+-`^vU@35hI_LU)MhZzF%AT2snSpRJ=R>Wr|Id&lTv)Kf$NvN9bR|XV5>vr{zcJ z-~K1t_D}F>`4Re;@EP<^@M-xG`uFIykD4C;1fP~4p??XVLH`7wmLH#gqeCB({_WU` z7PoHF80Vk$`DV7*9L+x^pU)51J^IJVH}((1xm4=@>BHgW)AEb{vG5IU{P8~re+Vw|kC#u&FZ##H=lY-j`M-(dA1`0X4+dbv{GrN9B{mT}s1)cQh>HIlao)&FZGXsZ*baqru~zAT7G=~rF{1MlYCl!eEtoeewAteB%hWa zpMNQzJ^v)1mLH#gqnob!e`5bLJ8<*OKP8`*AD@3IpFRI1pOzn=f5T_q?PI_{w77LE zy6B%jU&$1kwDzlC?`!=h_(FcD?$JL^zQ#WU=Ni54pFSL3J}tlK9}8b~yK|*~+jF1a zjsAu8d+YpfX&%r2B%jX@BS!x?`NsZXxWqqRJ}tlKA19ydfA#s_QOrMC{lkbx{KIgj z{KJTR{tXWvkmKKut(o~tcl^r~^HUuCYhUwE!Kck%y8f~8O+Wwi;{zj~mS5LD0>0+!+Iz{{uQ7so#uzK30n z{6jz7;ew0)h4o+S{9$<>&;Le(FXV?1qko)yjeiI(@sF2J%P;!J$>;i?KmW^Y+*fZGamC>=9S7*Z4jDMMm z$N8rpA2P)n2<4lZ@oD*W{bS&}(+yz%GgEO!|GIoyevJOL`7HX^<asKJ| zcQeKCo71a3tsf*`$Pd*$`p3!F_=n(J``7)`$H2>{<;k%{!3o!nn%_SFH^e?R6 zTjvie^LYL+5PTs&gc$wf{8RFU`zNY<*FRK7$7)`k312h*Whx%$pMHGE6l)-q zZ)V1)<=6FZ{`m$M{E}(^B%hWapMNQzJ^v)1mLH#gqldrQw11LM%a6~$l+T`jl26Nz z(7(Tb>nFkfOJ?Ban}3c3pOzn?e+i#K{{)|wAD@4NuYC{sw++wyqkn#XH&YD1IlbD` z`a$xA{7~Jaf1G@ce+bUCf89TQ47_|=e$hV`zR_nNdlMM{(B_g0F8UYN@2&mMZS#2k zAo)Um2r>G{$=CRY;1d6M`Lz6^f1G@-|M~NWspp@({vkv&{vkMH{vkv@|AtpT4vl}A z`Ac{F%M|ld9Q|ug^H0ea?w_dcUH?!S9jkeDCVb8Km#KK1fBNwuQ>=kdzL^=HmS5LD z0=~63*!EBGY55WQm+%?%Pw;8^@%dLh_q(S3lYCl!eEy|;_WYB4T7G=~4gcx8p91?Y znSq;d{wesh{P_G!^RwrlsW$`vw&R(9^w00_W{TlAr&oJgKS;iiAF6xw zkCU(Q55c+iuluKuftOFqFZxHrxA)b^zp2N+*8b=Ac|3m@2)>XXLX7@}{Ru9<#y9;Tt{m3glnr{u#$V&zC8dj&%50Pvf8D3;ChCcl|?UFpju76TZg3OvU5) z$C+Qvr{&l6FVhSnEO4Qx{HkC2rfL5qpOzn=e<`0m|0JK5AD@4thd=r+H?sbtxqk{i zEk8d0(){fCC;7De`24G0@aRul_D}L@`SJOe^4arG@@e_;`8WLF%aDINz>L^k^8K@Z z&F?Q|is3h>S9_XYOTLgFs(bX0ldtg)!MXOY`=^hAmru(t`p3dI_}#A}|1$e$(Z4YN zZ0!&3n9Kc7!58vFh|xbzzQ#WUm-xrar{x#@ejtwz9l3ln z`iqyt#Z6l_jlUJ`tMwl~|I_|s&wt6Ma;6x5b9%L}^^4#O`JuW;|2X*?{}7yO|GIzr7e1kju zm(POn5B+ih7hLo&>>sqwKX%UL`G?>O`60yUA17bqAA(E#P2B=VtsvaK`*Yh-UnodjFNzKZIz;KLlsYKZHo=-;X~G%|A2i2j4$kkPCi%&lIZ* zOnUUR{u>CsFn>XH@A`+zOadFfJ`=uX{+X$G-9LT(IPCdPd|G~d{-u2O{F8iIetiB_pZ&XA!2V}u;O3iu zN?Q{19UFkCU(Q55Xn= z@$zZ;MgKVYT>taue`s^pn12Y-jDHBun12Y-jDJ(F|9HngglNV;1ZT`YgvjUL=z+hB z=AW7MgYTa%$np7KrdVBI(xa#KpX3Yk7gYDIf2hnPu<`3N;cMofnTpr_)7Q_>&oafP zL7W+%mS5LD0=~VUv+bYY)AA$qFX1!jpWxH-d|)*SXl!_$T?a`Af$?I=(Re|^$#Li@ejh;@(&{N z`8PUk8yNo#=l&C-SLl+Ps^|C-~96p-u*1o z{z*P9KR*9bK70O2J}o~!|Ed??Xxcx?r{%}zU&?3CKgp-%$LHVZ;EjJL_g`}J7sC0c z;M4Ns^DoWMo_~^0%a6~$!JY2=Md05OUcN>D{QhpH7=CklwWsx;;i?KYz$<9L4dEmoMap5Y709;Eef)5DEQz@Nc5=FEfAX`j;u@r#Sl8p5~vG z;0yktx_A9UWpu3O)tT@$<6oxYasKJ=pP6C}g!0YI__X}G{>?w%;Kq-d_D}L@`SJOe z^4arG@@e@I`ghx(`qD=BUs&#+1Hq@|N9bR|XV5>vr{%}zU-ka`{=H@YB%hWapMNQz zJ^v)1mLH*i=R6ttw~S~0zJJ!Q`TgBYG5qHAYG3OI!58vFb&vjW@-_Y;IM@Dl|MW5N z@@e@+|5*42uekyEw>9(ccSQff{Im7>>*aYoe~^6R{2*fVkCQL>2jLR`c=@#aqJNxx zuKyj?`NLBC{Ey2&_z@o~{y{if{y{`D{$)094UK<`2+@px2+o*)2$9df>MNfF^UuZ1 z`l0JzrkG!1=wJI<{|P>A{m}J~jW5hUbNSW%)5qfHXN-JWeqI0OpKoySz5fC1e`YGq z=bz-$^5gR_<+JCX{ZE9*=ilJnKLPeX7c=|UzJJF2GR4*tp75ow^9R8f@BlPbrSK0PY@M-xG`j_w-^iS|< z`SJNTeER#pBG2D*^B2DPr{vS}M`Lz7_{7d=l`6v0b{P_H)T7J<#7QX7^@7|Dqe*T$>d5-*( zd|H0dKTf{s{p01+@{9hl@QogG>_3C~C;H(kXO-`twO*cYTl@Td`#he%OTLgFLhSm- z#@G0V;9P2P{Nv28=F{@)`p1@^>wiae{?5o3@x0T+$OCJ$-&c@`d>) zs(aTzRAwTXFEA6nX8o6`c-=pJ^;q^N()`-%KZMxz50#m({D!Z*$h3de59D#(k;^xu zzxb|KkFRce`1l*yzP9{N`Rw^G`Lz7_{I9M$^Xp*$HFM+Q8-FFAmLK2voATN7Px5K` z@%cA;-4jjwC;7De`20)x?D;48wEXz|8+`Ze$iE$UmLA7H|M`YYG5qHAYG3O!!58vF zb&vjW@-_Y;IM@Dl|MW5N@@e@+|7iGr`j~Hk`6t@ka=}Ia!u~<)_mA$F$MgSz;0yU7 z#ONOhY-#9hv1C)hY$(<+j@I6{$=Jb-SIC| z%ujLjuYJuw1z)&-qPlneLuGWV=GB?-HRE5V;&J}z$A?U@215B}W_((HUH|5vZ*#e5!lYCl!eEy|;_WYB4T7HE7{rOw|1?<0M25!Fj zXC?Tw{0RL^_ze0d__X}^{2TmU^-bX4PCWCE{`vjgOfmfC^lD%02f-KeLv@e-aq>0( zAvo9mb^r7+@bYQ-MgLg%M$bNDL;m^odnV@8H2kb5|0JK5U-XZYZ+icD`Lz6^e=K~% zXFmz~mpMO*(Kbbe&UKLlsYKZIz;zp3|MTIcU%;~zpa;~#=E<{v`j^RGJQO=$j^S=jmhSuf~V z=GT%hjDHBR>mMpp(@*i}X?>gWHS^C*#q0j*>&}^9d;WG*)l%-Q`V;T!kQY{k>@AzN%3#J=vI zf=|mY?Vl8UuNb^8?Vn3q7q>5G_Lt)PtUiCp7MrL11YgJxAx8f=`5ONaoJ%G8$IGYX z7yYB*d-4mBf9RL5x%{Gk_58ZNzcl+3wTuT=*Y+*!U0qw9LcqT6pMo#shY+KGoP3Rc z2rluDmru(t`p3!V`d__2pB=a@`8VDClk5J{_=n((`G*ks{2RUaay0&B<}Y3UGR3AC z{`%T~k$mC)iR#`R|4^A};9QxW#`l!3@h?;HIREtHL#Eg?h%@8U^6UCXz<2JKZ2Kqp zwEPJDOZW`>C-}7d`1~85`S90oWdDWb{wevi{P_G!`Rw^8`Lz59{kwk0ADH$}@M-xG z`j_w-^iS|<`SJNT`q03%f09qjkI%o9&z^shPs@+bzrigpK>nc*f4Wt<@1OPZ+@G&x zip|mbUGjzeP~D?{oP3Rc2+pNa_fH=VFQ1lQ^pAzFTD}GNx0Lx{S@bWg|62R=bNTP{ zNxpG@5Hb44$rt>CaEX7sd|H0dKTbZ^|NQx1=KR)>f6EBbihmH!mVXe@jDMMf6HERf zL^J*&IAi`HL_Yt9pZfxse=cX%4_*H<#rzUO|Ju{}t>n|z4_*J*_`>`%mtWmKeJp-{ z#>l7T*Y$7y`9@#b{swUVkf}JIf09qjkI%o9&z^shPs@+bzro6-ru~zAT7G=~rF{1M zlYCl!g#I0O$r~+?e*?j%lg`NI7Z)xGNna=4EiVdwEPJDJN#+e z{s}%UKSKW!K7;-VJ}o~&|9<57H-qtS;?Mv2^Oa07W!c`m)YJOC5_};)RQKo~Ctu?q zf^$t@_fMYyFQ1lQ^pAyaaL;ET|1#%Ca{k#mf0)aEA4u|r{19UFFYFI;@iqP-xWqqR zJ}tlKA19yde@Aux*ZTMIwl9-v{zhG`<~E5En>g>C-?pOzn?e+i#K{{)|wAD@4tO+WLNjqHE2+&?9s zmLH#gDW5(6B%hWapMQhvw!GD{f09qjkI%o9&z^shPs@+bztNTlnf6ceY5DQ_m-5;3 zPx5K`5&Cz+(~*C>@DkPc&-yigzL_b8-<)3UX?;5qd?7zn_vjxdU*jKwbM0UEPagv> zpO#Ex+g=C!gzo zM|J+bv;FT+llh0vZ_W6J;Eef)5Y7094mVwXmc~DXXvRMTXUso@XvV+H!KEet5TY6X z5S%gp5F(*}PrCuF|1#_Mu78