Skip to content

Commit 23bdf7a

Browse files
committed
Decouple data decoding and decoding e.g. of column names
1 parent 3bd1b35 commit 23bdf7a

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

pandas/io/sas/sas7bdat.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,21 @@ class SAS7BDATReader(BaseIterator):
5353
Return SAS7BDATReader object for iterations, returns chunks
5454
with given number of lines.
5555
encoding : string, defaults to None
56-
String encoding. If None, text variables are left as raw bytes.
56+
String encoding.
57+
convert_text : bool, deafaults to True
58+
If False, text variables are left as raw bytes.
5759
"""
5860

5961
def __init__(self, path_or_buf, index=None, convert_dates=True,
60-
blank_missing=True, chunksize=None, encoding=None):
62+
blank_missing=True, chunksize=None, encoding=None,
63+
convert_text=True):
6164

6265
self.index = index
6366
self.convert_dates = convert_dates
6467
self.blank_missing = blank_missing
6568
self.chunksize = chunksize
6669
self.encoding = encoding
70+
self.convert_text = convert_text
6771

6872
self.compression = ""
6973
self.column_names_strings = []
@@ -611,7 +615,7 @@ def _chunk_to_dataframe(self):
611615
elif self.column_types[j] == b's':
612616
rslt[name] = self._string_chunk[js, :]
613617
rslt[name] = rslt[name].apply(lambda x: x.rstrip(b'\x00 '))
614-
if self.encoding is not None:
618+
if self.convert_text and (self.encoding is not None):
615619
rslt[name] = rslt[name].apply(
616620
lambda x: x.decode(encoding=self.encoding))
617621
if self.blank_missing:

0 commit comments

Comments
 (0)