Skip to content

Commit abba4e2

Browse files
authored
REF: share Index/Block get_values_for_csv (#55485)
* REF: share Index/Block get_values_for_csv * mypy fixup
1 parent e1368cf commit abba4e2

File tree

13 files changed

+158
-148
lines changed

13 files changed

+158
-148
lines changed

pandas/core/indexes/base.py

+127-27
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
algos as libalgos,
3131
index as libindex,
3232
lib,
33+
writers,
3334
)
3435
from pandas._libs.internals import BlockValuesRefs
3536
import pandas._libs.join as libjoin
@@ -97,7 +98,6 @@
9798
is_bool_dtype,
9899
is_ea_or_datetimelike_dtype,
99100
is_float,
100-
is_float_dtype,
101101
is_hashable,
102102
is_integer,
103103
is_iterator,
@@ -119,6 +119,7 @@
119119
ExtensionDtype,
120120
IntervalDtype,
121121
PeriodDtype,
122+
SparseDtype,
122123
)
123124
from pandas.core.dtypes.generic import (
124125
ABCDataFrame,
@@ -151,7 +152,9 @@
151152
ArrowExtensionArray,
152153
BaseMaskedArray,
153154
Categorical,
155+
DatetimeArray,
154156
ExtensionArray,
157+
TimedeltaArray,
155158
)
156159
from pandas.core.arrays.string_ import StringArray
157160
from pandas.core.base import (
@@ -199,7 +202,10 @@
199202
MultiIndex,
200203
Series,
201204
)
202-
from pandas.core.arrays import PeriodArray
205+
from pandas.core.arrays import (
206+
IntervalArray,
207+
PeriodArray,
208+
)
203209

204210
__all__ = ["Index"]
205211

@@ -1403,7 +1409,7 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
14031409
result = trim_front(formatted)
14041410
return header + result
14051411

1406-
def _format_native_types(
1412+
def _get_values_for_csv(
14071413
self,
14081414
*,
14091415
na_rep: str_t = "",
@@ -1412,30 +1418,14 @@ def _format_native_types(
14121418
date_format=None,
14131419
quoting=None,
14141420
) -> npt.NDArray[np.object_]:
1415-
"""
1416-
Actually format specific types of the index.
1417-
"""
1418-
from pandas.io.formats.format import FloatArrayFormatter
1419-
1420-
if is_float_dtype(self.dtype) and not isinstance(self.dtype, ExtensionDtype):
1421-
formatter = FloatArrayFormatter(
1422-
self._values,
1423-
na_rep=na_rep,
1424-
float_format=float_format,
1425-
decimal=decimal,
1426-
quoting=quoting,
1427-
fixed_width=False,
1428-
)
1429-
return formatter.get_result_as_array()
1430-
1431-
mask = isna(self)
1432-
if self.dtype != object and not quoting:
1433-
values = np.asarray(self).astype(str)
1434-
else:
1435-
values = np.array(self, dtype=object, copy=True)
1436-
1437-
values[mask] = na_rep
1438-
return values
1421+
return get_values_for_csv(
1422+
self._values,
1423+
na_rep=na_rep,
1424+
decimal=decimal,
1425+
float_format=float_format,
1426+
date_format=date_format,
1427+
quoting=quoting,
1428+
)
14391429

14401430
def _summary(self, name=None) -> str_t:
14411431
"""
@@ -7629,3 +7619,113 @@ def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):
76297619
stacklevel=find_stack_level(),
76307620
)
76317621
return result
7622+
7623+
7624+
def get_values_for_csv(
7625+
values: ArrayLike,
7626+
*,
7627+
date_format,
7628+
na_rep: str = "nan",
7629+
quoting=None,
7630+
float_format=None,
7631+
decimal: str = ".",
7632+
) -> npt.NDArray[np.object_]:
7633+
"""
7634+
Convert to types which can be consumed by the standard library's
7635+
csv.writer.writerows.
7636+
"""
7637+
if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
7638+
# GH#40754 Convert categorical datetimes to datetime array
7639+
values = algos.take_nd(
7640+
values.categories._values,
7641+
ensure_platform_int(values._codes),
7642+
fill_value=na_rep,
7643+
)
7644+
7645+
values = ensure_wrapped_if_datetimelike(values)
7646+
7647+
if isinstance(values, (DatetimeArray, TimedeltaArray)):
7648+
if values.ndim == 1:
7649+
result = values._format_native_types(na_rep=na_rep, date_format=date_format)
7650+
result = result.astype(object, copy=False)
7651+
return result
7652+
7653+
# GH#21734 Process every column separately, they might have different formats
7654+
results_converted = []
7655+
for i in range(len(values)):
7656+
result = values[i, :]._format_native_types(
7657+
na_rep=na_rep, date_format=date_format
7658+
)
7659+
results_converted.append(result.astype(object, copy=False))
7660+
return np.vstack(results_converted)
7661+
7662+
elif isinstance(values.dtype, PeriodDtype):
7663+
# TODO: tests that get here in column path
7664+
values = cast("PeriodArray", values)
7665+
res = values._format_native_types(na_rep=na_rep, date_format=date_format)
7666+
return res
7667+
7668+
elif isinstance(values.dtype, IntervalDtype):
7669+
# TODO: tests that get here in column path
7670+
values = cast("IntervalArray", values)
7671+
mask = values.isna()
7672+
if not quoting:
7673+
result = np.asarray(values).astype(str)
7674+
else:
7675+
result = np.array(values, dtype=object, copy=True)
7676+
7677+
result[mask] = na_rep
7678+
return result
7679+
7680+
elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype):
7681+
# see GH#13418: no special formatting is desired at the
7682+
# output (important for appropriate 'quoting' behaviour),
7683+
# so do not pass it through the FloatArrayFormatter
7684+
if float_format is None and decimal == ".":
7685+
mask = isna(values)
7686+
7687+
if not quoting:
7688+
values = values.astype(str)
7689+
else:
7690+
values = np.array(values, dtype="object")
7691+
7692+
values[mask] = na_rep
7693+
values = values.astype(object, copy=False)
7694+
return values
7695+
7696+
from pandas.io.formats.format import FloatArrayFormatter
7697+
7698+
formatter = FloatArrayFormatter(
7699+
values,
7700+
na_rep=na_rep,
7701+
float_format=float_format,
7702+
decimal=decimal,
7703+
quoting=quoting,
7704+
fixed_width=False,
7705+
)
7706+
res = formatter.get_result_as_array()
7707+
res = res.astype(object, copy=False)
7708+
return res
7709+
7710+
elif isinstance(values, ExtensionArray):
7711+
mask = isna(values)
7712+
7713+
new_values = np.asarray(values.astype(object))
7714+
new_values[mask] = na_rep
7715+
return new_values
7716+
7717+
else:
7718+
mask = isna(values)
7719+
itemsize = writers.word_len(na_rep)
7720+
7721+
if values.dtype != _dtype_obj and not quoting and itemsize:
7722+
values = values.astype(str)
7723+
if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
7724+
# enlarge for the na_rep
7725+
values = values.astype(f"<U{itemsize}")
7726+
else:
7727+
values = np.array(values, dtype="object")
7728+
7729+
values[mask] = na_rep
7730+
values = values.astype(object, copy=False)
7731+
return values

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def _format_with_header(
219219
# TODO: not reached in tests 2023-10-11
220220
# matches base class except for whitespace padding and date_format
221221
return header + list(
222-
self._format_native_types(na_rep=na_rep, date_format=date_format)
222+
self._get_values_for_csv(na_rep=na_rep, date_format=date_format)
223223
)
224224

225225
@property

pandas/core/indexes/datetimes.py

-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def _new_DatetimeIndex(cls, d):
118118
"tzinfo",
119119
"dtype",
120120
"to_pydatetime",
121-
"_format_native_types",
122121
"date",
123122
"time",
124123
"timetz",

pandas/core/indexes/multi.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1384,15 +1384,15 @@ def _formatter_func(self, tup):
13841384
formatter_funcs = [level._formatter_func for level in self.levels]
13851385
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
13861386

1387-
def _format_native_types(
1387+
def _get_values_for_csv(
13881388
self, *, na_rep: str = "nan", **kwargs
13891389
) -> npt.NDArray[np.object_]:
13901390
new_levels = []
13911391
new_codes = []
13921392

13931393
# go through the levels and format them
13941394
for level, level_codes in zip(self.levels, self.codes):
1395-
level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
1395+
level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs)
13961396
# add nan values, if there are any
13971397
mask = level_codes == -1
13981398
if mask.any():
@@ -1408,7 +1408,7 @@ def _format_native_types(
14081408

14091409
if len(new_levels) == 1:
14101410
# a single-level multi-index
1411-
return Index(new_levels[0].take(new_codes[0]))._format_native_types()
1411+
return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()
14121412
else:
14131413
# reconstruct the multi-index
14141414
mi = MultiIndex(

pandas/core/indexes/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _new_PeriodIndex(cls, **d):
8080
PeriodArray,
8181
wrap=True,
8282
)
83-
@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
83+
@inherit_names(["is_leap_year"], PeriodArray)
8484
class PeriodIndex(DatetimeIndexOpsMixin):
8585
"""
8686
Immutable ndarray holding ordinal values indicating regular periods in time.

pandas/core/indexes/timedeltas.py

-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
"sum",
4949
"std",
5050
"median",
51-
"_format_native_types",
5251
],
5352
TimedeltaArray,
5453
)

pandas/core/internals/array_manager.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
Index,
6969
ensure_index,
7070
)
71+
from pandas.core.indexes.base import get_values_for_csv
7172
from pandas.core.internals.base import (
7273
DataManager,
7374
SingleDataManager,
@@ -79,7 +80,6 @@
7980
ensure_block_shape,
8081
external_values,
8182
extract_pandas_array,
82-
get_values_for_csv,
8383
maybe_coerce_values,
8484
new_block,
8585
)

0 commit comments

Comments
 (0)