Skip to content

Commit 307797c

Browse files
troelsjreback
authored andcommitted
BUG: Make sure that sas7bdat parsers memory is initialized to 0 (pandas-dev#21616) (pandas-dev#22651)
1 parent 79f95fe commit 307797c

File tree

4 files changed

+18
-2
lines changed

4 files changed

+18
-2
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ I/O
739739
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
740740
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
741741
- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
742-
-
742+
- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`)
743743

744744
Plotting
745745
^^^^^^^^

pandas/io/sas/sas7bdat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ def read(self, nrows=None):
614614
ns = (self.column_types == b's').sum()
615615

616616
self._string_chunk = np.empty((ns, nrows), dtype=np.object)
617-
self._byte_chunk = np.empty((nd, 8 * nrows), dtype=np.uint8)
617+
self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
618618

619619
self._current_row_in_chunk_index = 0
620620
p = Parser(self)
13 KB
Binary file not shown.

pandas/tests/io/sas/test_sas7bdat.py

+16
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,22 @@ def test_date_time(datapath):
183183
tm.assert_frame_equal(df, df0)
184184

185185

186+
def test_compact_numerical_values(datapath):
187+
# Regression test for #21616
188+
fname = datapath("io", "sas", "data", "cars.sas7bdat")
189+
df = pd.read_sas(fname, encoding='latin-1')
190+
# The two columns CYL and WGT in cars.sas7bdat have column
191+
# width < 8 and only contain integral values.
192+
# Test that pandas doesn't corrupt the numbers by adding
193+
# decimals.
194+
result = df['WGT']
195+
expected = df['WGT'].round()
196+
tm.assert_series_equal(result, expected, check_exact=True)
197+
result = df['CYL']
198+
expected = df['CYL'].round()
199+
tm.assert_series_equal(result, expected, check_exact=True)
200+
201+
186202
def test_zero_variables(datapath):
187203
# Check if the SAS file has zero variables (PR #18184)
188204
fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")

0 commit comments

Comments
 (0)