Skip to content

Commit ea87a7f

Browse files
committed
Fix encoding handling bug for py2
1 parent 8b4b96d commit ea87a7f

File tree

3 files changed

+15
-5
lines changed

3 files changed

+15
-5
lines changed

pandas/io/sas/sas7bdat.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,8 @@ def _process_columntext_subheader(self, offset, length):
406406
text_block_size = self._read_int(offset, const.text_block_size_length)
407407

408408
buf = self._read_bytes(offset, text_block_size)
409-
cname = buf[0:text_block_size].rstrip(b"\x00 ")
409+
cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
410+
cname = cname_raw
410411
if self.convert_header_text:
411412
cname = cname.decode(self.encoding or self.default_encoding)
412413
self.column_names_strings.append(cname)
@@ -415,7 +416,7 @@ def _process_columntext_subheader(self, offset, length):
415416
column_name = self.column_names_strings[0]
416417
compression_literal = ""
417418
for cl in const.compression_literals:
418-
if cl in str(column_name):
419+
if cl in cname_raw:
419420
compression_literal = cl
420421
self.compression = compression_literal
421422
offset -= self._int_length

pandas/io/sas/sas_constants.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@
9191
column_label_offset_length = 2
9292
column_label_length_offset = 32
9393
column_label_length_length = 2
94-
rle_compression = 'SASYZCRL'
95-
rdc_compression = 'SASYZCR2'
94+
rle_compression = b'SASYZCRL'
95+
rdc_compression = b'SASYZCR2'
9696

9797
compression_literals = [rle_compression, rdc_compression]
9898

pandas/io/tests/sas/test_sas7bdat.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,17 @@ def test_productsales():
9797
def test_12659():
9898
dirpath = tm.get_data_path()
9999
fname = os.path.join(dirpath, "test_12659.sas7bdat")
100-
df = pd.read_sas(fname, encoding='latin1')
100+
df = pd.read_sas(fname)
101101
fname = os.path.join(dirpath, "test_12659.csv")
102102
df0 = pd.read_csv(fname)
103103
df0 = df0.astype(np.float64)
104104
tm.assert_frame_equal(df, df0)
105+
106+
def test_airline():
107+
dirpath = tm.get_data_path()
108+
fname = os.path.join(dirpath, "airline.sas7bdat")
109+
df = pd.read_sas(fname)
110+
fname = os.path.join(dirpath, "airline.csv")
111+
df0 = pd.read_csv(fname)
112+
df0 = df0.astype(np.float64)
113+
tm.assert_frame_equal(df, df0, check_exact=False)

0 commit comments

Comments
 (0)