Skip to content

Commit 493363e

Browse files
Update NA repr (pandas-dev#30821)
* Update NA repr Closes pandas-dev#30415
1 parent 2baf788 commit 493363e

File tree

15 files changed

+51
-38
lines changed

15 files changed

+51
-38
lines changed

ci/code_checks.sh

+5-2
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,11 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
298298
-k"-from_arrays -from_breaks -from_intervals -from_tuples -set_closed -to_tuples -interval_range"
299299
RET=$(($RET + $?)) ; echo $MSG "DONE"
300300

301-
MSG='Doctests arrays/string_.py' ; echo $MSG
302-
pytest -q --doctest-modules pandas/core/arrays/string_.py
301+
MSG='Doctests arrays'; echo $MSG
302+
pytest -q --doctest-modules \
303+
pandas/core/arrays/string_.py \
304+
pandas/core/arrays/integer.py \
305+
pandas/core/arrays/boolean.py
303306
RET=$(($RET + $?)) ; echo $MSG "DONE"
304307

305308
MSG='Doctests arrays/boolean.py' ; echo $MSG

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ To completely override the default values that are recognized as missing, specif
11531153
.. _io.navaluesconst:
11541154

11551155
The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A',
1156-
'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
1156+
'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
11571157

11581158
Let us consider some examples:
11591159

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ Other API changes
576576
Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
577577
- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
578578
- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`).
579+
- Added ``<NA>`` to the list of default NA values for :meth:`read_csv` (:issue:`30821`)
579580

580581

581582
.. _whatsnew_100.api.documentation:

pandas/_libs/missing.pyx

+1-4
Original file line numberDiff line numberDiff line change
@@ -354,10 +354,7 @@ class NAType(C_NAType):
354354
return NAType._instance
355355

356356
def __repr__(self) -> str:
357-
return "NA"
358-
359-
def __str__(self) -> str:
360-
return "NA"
357+
return "<NA>"
361358

362359
def __bool__(self):
363360
raise TypeError("boolean value of NA is ambiguous")

pandas/_libs/parsers.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -1369,6 +1369,7 @@ STR_NA_VALUES = {
13691369
"N/A",
13701370
"n/a",
13711371
"NA",
1372+
"<NA>",
13721373
"#NA",
13731374
"NULL",
13741375
"null",

pandas/core/arrays/boolean.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ class BooleanArray(BaseMaskedArray):
244244
245245
>>> pd.array([True, False, None], dtype="boolean")
246246
<BooleanArray>
247-
[True, False, NA]
247+
[True, False, <NA>]
248248
Length: 3, dtype: boolean
249249
"""
250250

@@ -527,7 +527,7 @@ def any(self, skipna: bool = True, **kwargs):
527527
>>> pd.array([True, False, pd.NA]).any(skipna=False)
528528
True
529529
>>> pd.array([False, False, pd.NA]).any(skipna=False)
530-
NA
530+
<NA>
531531
"""
532532
kwargs.pop("axis", None)
533533
nv.validate_any((), kwargs)
@@ -592,7 +592,7 @@ def all(self, skipna: bool = True, **kwargs):
592592
required (whether ``pd.NA`` is True or False influences the result):
593593
594594
>>> pd.array([True, True, pd.NA]).all(skipna=False)
595-
NA
595+
<NA>
596596
>>> pd.array([True, False, pd.NA]).all(skipna=False)
597597
False
598598
"""

pandas/core/arrays/integer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -301,19 +301,19 @@ class IntegerArray(BaseMaskedArray):
301301
>>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
302302
>>> int_array
303303
<IntegerArray>
304-
[1, NaN, 3]
304+
[1, <NA>, 3]
305305
Length: 3, dtype: Int32
306306
307307
String aliases for the dtypes are also available. They are capitalized.
308308
309309
>>> pd.array([1, None, 3], dtype='Int32')
310310
<IntegerArray>
311-
[1, NaN, 3]
311+
[1, <NA>, 3]
312312
Length: 3, dtype: Int32
313313
314314
>>> pd.array([1, None, 3], dtype='UInt16')
315315
<IntegerArray>
316-
[1, NaN, 3]
316+
[1, <NA>, 3]
317317
Length: 3, dtype: UInt16
318318
"""
319319

pandas/core/arrays/string_.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ class StringArray(PandasArray):
131131
--------
132132
>>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
133133
<StringArray>
134-
['This is', 'some text', NA, 'data.']
134+
['This is', 'some text', <NA>, 'data.']
135135
Length: 4, dtype: string
136136
137137
Unlike ``object`` dtype arrays, ``StringArray`` doesn't allow non-string
@@ -146,7 +146,7 @@ class StringArray(PandasArray):
146146
147147
>>> pd.array(["a", None, "c"], dtype="string") == "a"
148148
<BooleanArray>
149-
[True, NA, False]
149+
[True, <NA>, False]
150150
Length: 3, dtype: boolean
151151
"""
152152

pandas/core/internals/blocks.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1777,12 +1777,8 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
17771777
values = values[slicer]
17781778
mask = isna(values)
17791779

1780-
try:
1781-
values[mask] = na_rep
1782-
except Exception:
1783-
# eg SparseArray does not support setitem, needs to be converted to ndarray
1784-
return super().to_native_types(slicer, na_rep, quoting, **kwargs)
1785-
values = values.astype(str)
1780+
values = np.asarray(values.astype(object))
1781+
values[mask] = na_rep
17861782

17871783
# we are expected to return a 2-d ndarray
17881784
return values.reshape(1, len(values))

pandas/io/formats/format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1230,7 +1230,7 @@ def _format(x):
12301230
if x is None:
12311231
return "None"
12321232
elif x is NA:
1233-
return "NA"
1233+
return formatter(x)
12341234
elif x is NaT or np.isnat(x):
12351235
return "NaT"
12361236
except (TypeError, ValueError):

pandas/tests/arrays/string_/test_string.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@
99
import pandas._testing as tm
1010

1111

12-
def test_repr_with_NA():
13-
a = pd.array(["a", pd.NA, "b"], dtype="string")
14-
for obj in [a, pd.Series(a), pd.DataFrame({"a": a})]:
15-
assert "NA" in repr(obj) and "NaN" not in repr(obj)
16-
assert "NA" in str(obj) and "NaN" not in str(obj)
17-
if hasattr(obj, "_repr_html_"):
18-
html_repr = obj._repr_html_()
19-
assert "NA" in html_repr and "NaN" not in html_repr
12+
def test_repr():
13+
df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype="string")})
14+
expected = " A\n0 a\n1 <NA>\n2 b"
15+
assert repr(df) == expected
16+
17+
expected = "0 a\n1 <NA>\n2 b\nName: A, dtype: string"
18+
assert repr(df.A) == expected
19+
20+
expected = "<StringArray>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
21+
assert repr(df.A.array) == expected
2022

2123

2224
def test_none_to_nan():

pandas/tests/arrays/test_boolean.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,18 @@ def test_coerce_to_numpy_array():
251251
np.array(arr, dtype="bool")
252252

253253

254+
def test_repr():
255+
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
256+
expected = " A\n0 True\n1 False\n2 <NA>"
257+
assert repr(df) == expected
258+
259+
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
260+
assert repr(df.A) == expected
261+
262+
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
263+
assert repr(df.A.array) == expected
264+
265+
254266
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
255267
def test_to_numpy(box):
256268
con = pd.Series if box else pd.array
@@ -335,7 +347,7 @@ def test_astype():
335347
tm.assert_numpy_array_equal(result, expected)
336348

337349
result = arr.astype("str")
338-
expected = np.array(["True", "False", "NA"], dtype="object")
350+
expected = np.array(["True", "False", "<NA>"], dtype="object")
339351
tm.assert_numpy_array_equal(result, expected)
340352

341353
# no missing values

pandas/tests/arrays/test_integer.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,17 @@ def test_repr_dtype(dtype, expected):
9090

9191
def test_repr_array():
9292
result = repr(integer_array([1, None, 3]))
93-
expected = "<IntegerArray>\n[1, NA, 3]\nLength: 3, dtype: Int64"
93+
expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
9494
assert result == expected
9595

9696

9797
def test_repr_array_long():
9898
data = integer_array([1, 2, None] * 1000)
9999
expected = (
100100
"<IntegerArray>\n"
101-
"[ 1, 2, NA, 1, 2, NA, 1, 2, NA, 1,\n"
101+
"[ 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>, 1,\n"
102102
" ...\n"
103-
" NA, 1, 2, NA, 1, 2, NA, 1, 2, NA]\n"
103+
" <NA>, 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>]\n"
104104
"Length: 3000, dtype: Int64"
105105
)
106106
result = repr(data)
@@ -673,7 +673,7 @@ def test_to_numpy_na_raises(self, dtype):
673673

674674
def test_astype_str(self):
675675
a = pd.array([1, 2, None], dtype="Int64")
676-
expected = np.array(["1", "2", "NA"], dtype=object)
676+
expected = np.array(["1", "2", "<NA>"], dtype=object)
677677

678678
tm.assert_numpy_array_equal(a.astype(str), expected)
679679
tm.assert_numpy_array_equal(a.astype("str"), expected)
@@ -683,7 +683,7 @@ def test_frame_repr(data_missing):
683683

684684
df = pd.DataFrame({"A": data_missing})
685685
result = repr(df)
686-
expected = " A\n0 NA\n1 1"
686+
expected = " A\n0 <NA>\n1 1"
687687
assert result == expected
688688

689689

pandas/tests/io/parser/test_na_values.py

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def test_default_na_values(all_parsers):
8989
"N/A",
9090
"n/a",
9191
"NA",
92+
"<NA>",
9293
"#NA",
9394
"NULL",
9495
"null",

pandas/tests/scalar/test_na_scalar.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ def test_singleton():
1616

1717

1818
def test_repr():
19-
assert repr(NA) == "NA"
20-
assert str(NA) == "NA"
19+
assert repr(NA) == "<NA>"
20+
assert str(NA) == "<NA>"
2121

2222

2323
def test_truthiness():

0 commit comments

Comments
 (0)