From 59856693d7e7ff2fc4a9db3e14cd76a0fefc9c1a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 2 Apr 2020 15:27:17 -0700 Subject: [PATCH] REF: do slicing before calling Block.to_native_types --- pandas/core/internals/blocks.py | 36 ++++++++------------------------- pandas/io/formats/csvs.py | 11 ++++++---- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8e2592a603716..44c63aaf6a758 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -651,12 +651,10 @@ def should_store(self, value: ArrayLike) -> bool: """ return is_dtype_equal(value.dtype, self.dtype) - def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): - """ convert to our native types format, slicing if desired """ + def to_native_types(self, na_rep="nan", quoting=None, **kwargs): + """ convert to our native types format """ values = self.values - if slicer is not None: - values = values[:, slicer] mask = isna(values) itemsize = writers.word_len(na_rep) @@ -1715,11 +1713,9 @@ def get_values(self, dtype=None): def array_values(self) -> ExtensionArray: return self.values - def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): + def to_native_types(self, na_rep="nan", quoting=None, **kwargs): """override to use ExtensionArray astype for the conversion""" values = self.values - if slicer is not None: - values = values[slicer] mask = isna(values) values = np.asarray(values.astype(object)) @@ -1937,18 +1933,10 @@ def _can_hold_element(self, element: Any) -> bool: ) def to_native_types( - self, - slicer=None, - na_rep="", - float_format=None, - decimal=".", - quoting=None, - **kwargs, + self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs, ): - """ convert to our native types format, slicing if desired """ + """ convert to our native types format """ values = self.values - if slicer is not None: - values = values[:, slicer] # see gh-13418: no special formatting is desired at the # output (important for appropriate 'quoting' behaviour), @@ -2131,17 +2119,11 @@ def _can_hold_element(self, element: Any) -> bool: return is_valid_nat_for_dtype(element, self.dtype) - def to_native_types( - self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs - ): + def to_native_types(self, na_rep=None, date_format=None, quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values i8values = self.values.view("i8") - if slicer is not None: - values = values[..., slicer] - i8values = i8values[..., slicer] - from pandas.io.formats.format import _get_format_datetime64_from_values fmt = _get_format_datetime64_from_values(values, date_format) @@ -2387,11 +2369,9 @@ def fillna(self, value, **kwargs): ) return super().fillna(value, **kwargs) - def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): - """ convert to our native types format, slicing if desired """ + def to_native_types(self, na_rep=None, quoting=None, **kwargs): + """ convert to our native types format """ values = self.values - if slicer is not None: - values = values[:, slicer] mask = isna(values) rvalues = np.empty(values.shape, dtype=object) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 0d581f30e50e7..60691d1fea412 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -132,7 +132,7 @@ def __init__( # preallocate data 2d list self.blocks = self.obj._data.blocks - ncols = sum(b.shape[0] for b in self.blocks) + ncols = self.obj.shape[-1] self.data = [None] * ncols if chunksize is None: @@ -327,10 +327,13 @@ def _save_chunk(self, start_i: int, end_i: int) -> None: # create the data for a chunk slicer = slice(start_i, end_i) - for i in range(len(self.blocks)): - b = self.blocks[i] + + df = self.obj.iloc[slicer] + blocks = df._data.blocks + + for i in range(len(blocks)): + b = blocks[i] d = b.to_native_types( - slicer=slicer, na_rep=self.na_rep, float_format=self.float_format, decimal=self.decimal,