Skip to content

Commit ea2339f

Browse files
committed
Use encoding when reading column headers
1 parent 7d91d51 commit ea2339f

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

pandas/io/sas/sas7bdat.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@
120120

121121
_compression_literals = [_rle_compression, _rdc_compression]
122122

123-
# Incomplete list of encodings
123+
# Incomplete list of encodings, using SAS nomenclature:
124+
# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
124125
_encoding_names = {29: "latin1", 20: "utf-8", 33: "cyrillic", 60: "wlatin2",
125126
61: "wcyrillic", 62: "wlatin1", 90: "ebcdic870"}
126127

@@ -526,7 +527,7 @@ def _process_columntext_subheader(self, offset, length):
526527

527528
buf = self._read_bytes(offset, text_block_size)
528529
self.column_names_strings.append(
529-
buf[0:text_block_size].rstrip(b"\x00 ").decode())
530+
buf[0:text_block_size].rstrip(b"\x00 ").decode(self.encoding))
530531

531532
if len(self.column_names_strings) == 1:
532533
column_name = self.column_names_strings[0]
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
yearmonth,useGpCo,useGpVi,useSpec,useUrge,caseGpCo,caseGpVi,caseSpec,caseUrge,expendGpCo,expendGpVi,expendSpec,expendUrge
2+
201401,11,12,13,14,15,16,17,18,19,20,21,22
3+
201402,11,12,13,14,15,16,17,18,19,20,21,22
4+
201403,11,12,13,14,15,16,17,18,19,20,21,22
5+
201404,11,12,13,14,15,16,17,18,19,20,21,22
6+
201405,11,12,13,14,15,16,17,18,19,20,21,22
7+
201406,11,12,13,14,15,16,17,18,19,20,21,22
8+
201407,11,12,13,14,15,16,17,18,19,20,21,22
9+
201408,11,12,13,14,15,16,17,18,19,20,21,22
10+
201409,11,12,13,14,15,16,17,18,19,20,21,22
11+
201410,11,12,13,14,15,16,17,18,19,20,21,22
12+
201411,11,12,13,14,15,16,17,18,19,20,21,22
13+
201412,11,12,13,14,15,16,17,18,19,20,21,22
14+
201501,11,12,13,14,15,16,17,18,19,20,21,22
15+
201502,11,12,13,14,15,16,17,18,19,20,21,22
16+
201503,11,12,13,14,15,16,17,18,19,20,21,22
17+
201504,11,12,13,14,15,16,17,18,19,20,21,22
18+
201505,11,12,13,14,15,16,17,18,19,20,21,22
19+
201506,11,12,13,14,15,16,17,18,19,20,21,22
20+
201507,11,12,13,14,15,16,17,18,19,20,21,22
21+
201508,11,12,13,14,15,16,17,18,19,20,21,22
22+
201509,11,12,13,14,15,16,17,18,19,20,21,22
23+
201510,11,12,13,14,15,16,17,18,19,20,21,22
24+
201511,11,12,13,14,15,16,17,18,19,20,21,22
25+
201512,11,12,13,14,15,16,17,18,19,20,21,22
26+
201601,11,12,13,14,15,16,17,18,19,20,21,22
27+
201602,11,12,13,14,15,16,17,18,19,20,21,22
28+
201603,11,12,13,14,15,16,17,18,19,20,21,22
29+
201604,11,12,13,14,15,16,17,18,19,20,21,22
30+
201605,11,12,13,14,15,16,17,18,19,20,21,22
31+
201606,11,12,13,14,15,16,17,18,19,20,21,22
32+
201607,11,12,13,14,15,16,17,18,19,20,21,22
33+
201608,11,12,13,14,15,16,17,18,19,20,21,22
34+
201609,11,12,13,14,15,16,17,18,19,20,21,22
35+
201610,11,12,13,14,15,16,17,18,19,20,21,22
36+
201611,11,12,13,14,15,16,17,18,19,20,21,22
37+
201612,11,12,13,14,15,16,17,18,19,20,21,22
128 KB
Binary file not shown.

pandas/io/tests/sas/test_sas7bdat.py

+10
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,13 @@ def test_productsales():
7373
vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR", "MONTH"]
7474
df0[vn] = df0[vn].astype(np.float64)
7575
tm.assert_frame_equal(df, df0)
76+
77+
78+
def test_12659():
79+
dirpath = tm.get_data_path()
80+
fname = os.path.join(dirpath, "test_12659.sas7bdat")
81+
df = pd.read_sas(fname, encoding='latin1')
82+
fname = os.path.join(dirpath, "test_12659.csv")
83+
df0 = pd.read_csv(fname)
84+
df0 = df0.astype(np.float64)
85+
tm.assert_frame_equal(df, df0)

0 commit comments

Comments
 (0)