30
30
algos as libalgos ,
31
31
index as libindex ,
32
32
lib ,
33
+ writers ,
33
34
)
34
35
from pandas ._libs .internals import BlockValuesRefs
35
36
import pandas ._libs .join as libjoin
97
98
is_bool_dtype ,
98
99
is_ea_or_datetimelike_dtype ,
99
100
is_float ,
100
- is_float_dtype ,
101
101
is_hashable ,
102
102
is_integer ,
103
103
is_iterator ,
119
119
ExtensionDtype ,
120
120
IntervalDtype ,
121
121
PeriodDtype ,
122
+ SparseDtype ,
122
123
)
123
124
from pandas .core .dtypes .generic import (
124
125
ABCDataFrame ,
151
152
ArrowExtensionArray ,
152
153
BaseMaskedArray ,
153
154
Categorical ,
155
+ DatetimeArray ,
154
156
ExtensionArray ,
157
+ TimedeltaArray ,
155
158
)
156
159
from pandas .core .arrays .string_ import StringArray
157
160
from pandas .core .base import (
199
202
MultiIndex ,
200
203
Series ,
201
204
)
202
- from pandas .core .arrays import PeriodArray
205
+ from pandas .core .arrays import (
206
+ IntervalArray ,
207
+ PeriodArray ,
208
+ )
203
209
204
210
__all__ = ["Index" ]
205
211
@@ -1403,7 +1409,7 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
1403
1409
result = trim_front (formatted )
1404
1410
return header + result
1405
1411
1406
- def _format_native_types (
1412
+ def _get_values_for_csv (
1407
1413
self ,
1408
1414
* ,
1409
1415
na_rep : str_t = "" ,
@@ -1412,30 +1418,14 @@ def _format_native_types(
1412
1418
date_format = None ,
1413
1419
quoting = None ,
1414
1420
) -> npt .NDArray [np .object_ ]:
1415
- """
1416
- Actually format specific types of the index.
1417
- """
1418
- from pandas .io .formats .format import FloatArrayFormatter
1419
-
1420
- if is_float_dtype (self .dtype ) and not isinstance (self .dtype , ExtensionDtype ):
1421
- formatter = FloatArrayFormatter (
1422
- self ._values ,
1423
- na_rep = na_rep ,
1424
- float_format = float_format ,
1425
- decimal = decimal ,
1426
- quoting = quoting ,
1427
- fixed_width = False ,
1428
- )
1429
- return formatter .get_result_as_array ()
1430
-
1431
- mask = isna (self )
1432
- if self .dtype != object and not quoting :
1433
- values = np .asarray (self ).astype (str )
1434
- else :
1435
- values = np .array (self , dtype = object , copy = True )
1436
-
1437
- values [mask ] = na_rep
1438
- return values
1421
+ return get_values_for_csv (
1422
+ self ._values ,
1423
+ na_rep = na_rep ,
1424
+ decimal = decimal ,
1425
+ float_format = float_format ,
1426
+ date_format = date_format ,
1427
+ quoting = quoting ,
1428
+ )
1439
1429
1440
1430
def _summary (self , name = None ) -> str_t :
1441
1431
"""
@@ -7629,3 +7619,113 @@ def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):
7629
7619
stacklevel = find_stack_level (),
7630
7620
)
7631
7621
return result
7622
+
7623
+
7624
+ def get_values_for_csv (
7625
+ values : ArrayLike ,
7626
+ * ,
7627
+ date_format ,
7628
+ na_rep : str = "nan" ,
7629
+ quoting = None ,
7630
+ float_format = None ,
7631
+ decimal : str = "." ,
7632
+ ) -> npt .NDArray [np .object_ ]:
7633
+ """
7634
+ Convert to types which can be consumed by the standard library's
7635
+ csv.writer.writerows.
7636
+ """
7637
+ if isinstance (values , Categorical ) and values .categories .dtype .kind in "Mm" :
7638
+ # GH#40754 Convert categorical datetimes to datetime array
7639
+ values = algos .take_nd (
7640
+ values .categories ._values ,
7641
+ ensure_platform_int (values ._codes ),
7642
+ fill_value = na_rep ,
7643
+ )
7644
+
7645
+ values = ensure_wrapped_if_datetimelike (values )
7646
+
7647
+ if isinstance (values , (DatetimeArray , TimedeltaArray )):
7648
+ if values .ndim == 1 :
7649
+ result = values ._format_native_types (na_rep = na_rep , date_format = date_format )
7650
+ result = result .astype (object , copy = False )
7651
+ return result
7652
+
7653
+ # GH#21734 Process every column separately, they might have different formats
7654
+ results_converted = []
7655
+ for i in range (len (values )):
7656
+ result = values [i , :]._format_native_types (
7657
+ na_rep = na_rep , date_format = date_format
7658
+ )
7659
+ results_converted .append (result .astype (object , copy = False ))
7660
+ return np .vstack (results_converted )
7661
+
7662
+ elif isinstance (values .dtype , PeriodDtype ):
7663
+ # TODO: tests that get here in column path
7664
+ values = cast ("PeriodArray" , values )
7665
+ res = values ._format_native_types (na_rep = na_rep , date_format = date_format )
7666
+ return res
7667
+
7668
+ elif isinstance (values .dtype , IntervalDtype ):
7669
+ # TODO: tests that get here in column path
7670
+ values = cast ("IntervalArray" , values )
7671
+ mask = values .isna ()
7672
+ if not quoting :
7673
+ result = np .asarray (values ).astype (str )
7674
+ else :
7675
+ result = np .array (values , dtype = object , copy = True )
7676
+
7677
+ result [mask ] = na_rep
7678
+ return result
7679
+
7680
+ elif values .dtype .kind == "f" and not isinstance (values .dtype , SparseDtype ):
7681
+ # see GH#13418: no special formatting is desired at the
7682
+ # output (important for appropriate 'quoting' behaviour),
7683
+ # so do not pass it through the FloatArrayFormatter
7684
+ if float_format is None and decimal == "." :
7685
+ mask = isna (values )
7686
+
7687
+ if not quoting :
7688
+ values = values .astype (str )
7689
+ else :
7690
+ values = np .array (values , dtype = "object" )
7691
+
7692
+ values [mask ] = na_rep
7693
+ values = values .astype (object , copy = False )
7694
+ return values
7695
+
7696
+ from pandas .io .formats .format import FloatArrayFormatter
7697
+
7698
+ formatter = FloatArrayFormatter (
7699
+ values ,
7700
+ na_rep = na_rep ,
7701
+ float_format = float_format ,
7702
+ decimal = decimal ,
7703
+ quoting = quoting ,
7704
+ fixed_width = False ,
7705
+ )
7706
+ res = formatter .get_result_as_array ()
7707
+ res = res .astype (object , copy = False )
7708
+ return res
7709
+
7710
+ elif isinstance (values , ExtensionArray ):
7711
+ mask = isna (values )
7712
+
7713
+ new_values = np .asarray (values .astype (object ))
7714
+ new_values [mask ] = na_rep
7715
+ return new_values
7716
+
7717
+ else :
7718
+ mask = isna (values )
7719
+ itemsize = writers .word_len (na_rep )
7720
+
7721
+ if values .dtype != _dtype_obj and not quoting and itemsize :
7722
+ values = values .astype (str )
7723
+ if values .dtype .itemsize / np .dtype ("U1" ).itemsize < itemsize :
7724
+ # enlarge for the na_rep
7725
+ values = values .astype (f"<U{ itemsize } " )
7726
+ else :
7727
+ values = np .array (values , dtype = "object" )
7728
+
7729
+ values [mask ] = na_rep
7730
+ values = values .astype (object , copy = False )
7731
+ return values
0 commit comments