diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7aa1c1e84aa09..e63107a8eb243 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -799,6 +799,7 @@ Performance improvements - Performance improvement in :class:`BusinessHour` ``str`` and ``repr`` (:issue:`44764`) - Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`) - Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`) +- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 9ea1c31c3d5cf..d8591c0b033a6 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -424,8 +424,11 @@ cdef class Parser: jb += 1 elif column_types[j] == column_type_string: # string - string_chunk[js, current_row] = np.array(source[start:( - start + lngt)]).tobytes().rstrip(b"\x00 ") + # Skip trailing whitespace. This is equivalent to calling + # .rstrip(b"\x00 ") but without Python call overhead. + while lngt > 0 and source[start+lngt-1] in b"\x00 ": + lngt -= 1 + string_chunk[js, current_row] = (&source[start])[:lngt] js += 1 self.current_row_on_page_index += 1