Skip to content

Commit 34ccf05

Browse files
jbrockmendelfeefladder
authored andcommitted
PERF: read_sas (pandas-dev#43333)
1 parent 84cf124 commit 34ccf05

File tree

2 files changed

+10
-16
lines changed

2 files changed

+10
-16
lines changed

pandas/io/sas/sas7bdat.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ def _chunk_to_dataframe(self) -> DataFrame:
789789
n = self._current_row_in_chunk_index
790790
m = self._current_row_in_file_index
791791
ix = range(m - n, m)
792-
rslt = DataFrame(index=ix)
792+
rslt = {}
793793

794794
js, jb = 0, 0
795795
for j in range(self.column_count):
@@ -798,25 +798,26 @@ def _chunk_to_dataframe(self) -> DataFrame:
798798

799799
if self._column_types[j] == b"d":
800800
rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
801-
rslt[name] = np.asarray(rslt[name], dtype=np.float64)
801+
rslt[name] = pd.Series(rslt[name], dtype=np.float64, index=ix)
802802
if self.convert_dates:
803803
if self.column_formats[j] in const.sas_date_formats:
804804
rslt[name] = _convert_datetimes(rslt[name], "d")
805805
elif self.column_formats[j] in const.sas_datetime_formats:
806806
rslt[name] = _convert_datetimes(rslt[name], "s")
807807
jb += 1
808808
elif self._column_types[j] == b"s":
809-
rslt[name] = self._string_chunk[js, :]
809+
rslt[name] = pd.Series(self._string_chunk[js, :], index=ix)
810810
if self.convert_text and (self.encoding is not None):
811811
rslt[name] = rslt[name].str.decode(
812812
self.encoding or self.default_encoding
813813
)
814814
if self.blank_missing:
815815
ii = rslt[name].str.len() == 0
816-
rslt.loc[ii, name] = np.nan
816+
rslt[name][ii] = np.nan
817817
js += 1
818818
else:
819819
self.close()
820820
raise ValueError(f"unknown column type {self._column_types[j]}")
821821

822-
return rslt
822+
df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False)
823+
return df

pandas/tests/io/sas/test_sas7bdat.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@
77
import numpy as np
88
import pytest
99

10-
from pandas.errors import (
11-
EmptyDataError,
12-
PerformanceWarning,
13-
)
10+
from pandas.errors import EmptyDataError
1411
import pandas.util._test_decorators as td
1512

1613
import pandas as pd
@@ -202,15 +199,11 @@ def test_compact_numerical_values(datapath):
202199
tm.assert_series_equal(result, expected, check_exact=True)
203200

204201

205-
def test_many_columns(datapath, using_array_manager):
202+
def test_many_columns(datapath):
206203
# Test for looking for column information in more places (PR #22628)
207204
fname = datapath("io", "sas", "data", "many_columns.sas7bdat")
208-
expected_warning = None
209-
if not using_array_manager:
210-
expected_warning = PerformanceWarning
211-
with tm.assert_produces_warning(expected_warning):
212-
# Many DataFrame.insert calls
213-
df = pd.read_sas(fname, encoding="latin-1")
205+
206+
df = pd.read_sas(fname, encoding="latin-1")
214207

215208
fname = datapath("io", "sas", "data", "many_columns.csv")
216209
df0 = pd.read_csv(fname, encoding="latin-1")

0 commit comments

Comments
 (0)