|
53 | 53 | DatetimeIndex,
|
54 | 54 | NaT,
|
55 | 55 | Timestamp,
|
56 |
| - concat, |
57 | 56 | isna,
|
58 | 57 | to_datetime,
|
59 | 58 | to_timedelta,
|
@@ -1663,7 +1662,7 @@ def read(
|
1663 | 1662 | # restarting at 0 for each chunk.
|
1664 | 1663 | if index_col is None:
|
1665 | 1664 | ix = np.arange(self._lines_read - read_lines, self._lines_read)
|
1666 |
| - data = data.set_index(ix) |
| 1665 | + data.index = ix # set attr instead of set_index to avoid copy |
1667 | 1666 |
|
1668 | 1667 | if columns is not None:
|
1669 | 1668 | try:
|
@@ -1779,19 +1778,18 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
|
1779 | 1778 | if dtype not in (np.float32, np.float64):
|
1780 | 1779 | dtype = np.float64
|
1781 | 1780 | replacement = Series(series, dtype=dtype)
|
| 1781 | + if not replacement._values.flags["WRITEABLE"]: |
| 1782 | + # only relevant for ArrayManager; construction |
| 1783 | + # path for BlockManager ensures writeability |
| 1784 | + replacement = replacement.copy() |
1782 | 1785 | # Note: operating on ._values is much faster than directly
|
1783 | 1786 | # TODO: can we fix that?
|
1784 | 1787 | replacement._values[missing] = np.nan
|
1785 | 1788 | replacements[colname] = replacement
|
| 1789 | + |
1786 | 1790 | if replacements:
|
1787 |
| - columns = data.columns |
1788 |
| - replacement_df = DataFrame(replacements, copy=False) |
1789 |
| - replaced = concat( |
1790 |
| - [data.drop(replacement_df.columns, axis=1), replacement_df], |
1791 |
| - axis=1, |
1792 |
| - copy=False, |
1793 |
| - ) |
1794 |
| - data = replaced[columns] |
| 1791 | + for col in replacements: |
| 1792 | + data[col] = replacements[col] |
1795 | 1793 | return data
|
1796 | 1794 |
|
1797 | 1795 | def _insert_strls(self, data: DataFrame) -> DataFrame:
|
|
0 commit comments