diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index fb121d3aed105..300df9728cd75 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -789,7 +789,7 @@ def _chunk_to_dataframe(self) -> DataFrame: n = self._current_row_in_chunk_index m = self._current_row_in_file_index ix = range(m - n, m) - rslt = DataFrame(index=ix) + rslt = {} js, jb = 0, 0 for j in range(self.column_count): @@ -798,7 +798,7 @@ def _chunk_to_dataframe(self) -> DataFrame: if self._column_types[j] == b"d": rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d") - rslt[name] = np.asarray(rslt[name], dtype=np.float64) + rslt[name] = pd.Series(rslt[name], dtype=np.float64, index=ix) if self.convert_dates: if self.column_formats[j] in const.sas_date_formats: rslt[name] = _convert_datetimes(rslt[name], "d") @@ -806,17 +806,18 @@ def _chunk_to_dataframe(self) -> DataFrame: rslt[name] = _convert_datetimes(rslt[name], "s") jb += 1 elif self._column_types[j] == b"s": - rslt[name] = self._string_chunk[js, :] + rslt[name] = pd.Series(self._string_chunk[js, :], index=ix) if self.convert_text and (self.encoding is not None): rslt[name] = rslt[name].str.decode( self.encoding or self.default_encoding ) if self.blank_missing: ii = rslt[name].str.len() == 0 - rslt.loc[ii, name] = np.nan + rslt[name][ii] = np.nan js += 1 else: self.close() raise ValueError(f"unknown column type {self._column_types[j]}") - return rslt + df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False) + return df diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 3b6bfee8f9657..5477559262cb8 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -7,10 +7,7 @@ import numpy as np import pytest -from pandas.errors import ( - EmptyDataError, - PerformanceWarning, -) +from pandas.errors import EmptyDataError import pandas.util._test_decorators as td import pandas as pd @@ -202,15 +199,11 @@ def test_compact_numerical_values(datapath): tm.assert_series_equal(result, expected, check_exact=True) -def test_many_columns(datapath, using_array_manager): +def test_many_columns(datapath): # Test for looking for column information in more places (PR #22628) fname = datapath("io", "sas", "data", "many_columns.sas7bdat") - expected_warning = None - if not using_array_manager: - expected_warning = PerformanceWarning - with tm.assert_produces_warning(expected_warning): - # Many DataFrame.insert calls - df = pd.read_sas(fname, encoding="latin-1") + + df = pd.read_sas(fname, encoding="latin-1") fname = datapath("io", "sas", "data", "many_columns.csv") df0 = pd.read_csv(fname, encoding="latin-1")