diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 413597f6c3748..4925a72e6b985 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -217,7 +217,7 @@ MultiIndex I/O ^^^ -- +- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`) - Period diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 648c58dee6600..a5df2bde38096 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -481,7 +481,7 @@ def read(self, nrows: int | None = None) -> pd.DataFrame: raw = self.filepath_or_buffer.read(read_len) data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) - df = pd.DataFrame(index=range(read_lines)) + df_data = {} for j, x in enumerate(self.columns): vec = data["s" + str(j)] ntype = self.fields[j]["ntype"] @@ -496,7 +496,8 @@ def read(self, nrows: int | None = None) -> pd.DataFrame: if self._encoding is not None: v = [y.decode(self._encoding) for y in v] - df[x] = v + df_data.update({x: v}) + df = pd.DataFrame(df_data) if self._index is None: df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines))