Skip to content

BUG: bad display for complex series with nan #53764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 26 additions & 20 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1506,14 +1506,16 @@ def format_values_with(float_format):

# default formatter leaves a space to the left when formatting
# floats, must be consistent for left-justifying NaNs (GH #25061)
if self.justify == "left":
na_rep = " " + self.na_rep
else:
na_rep = self.na_rep
na_rep = " " + self.na_rep if self.justify == "left" else self.na_rep

# separate the wheat from the chaff
# different formatting strategies for complex and non-complex data
# need to distinguish complex and float NaNs (GH #53762)
values = self.values
is_complex = is_complex_dtype(values)
if is_complex:
na_rep = f"{na_rep}+{0:.{self.digits}f}j"

# separate the wheat from the chaff
values = format_with_na_rep(values, formatter, na_rep)

if self.fixed_width:
Expand Down Expand Up @@ -1912,22 +1914,26 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s
Separates the real and imaginary parts from the complex number, and
executes the _trim_zeros_float method on each of those.
"""
trimmed = [
"".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
for x in str_complexes
]

# pad strings to the length of the longest trimmed string for alignment
lengths = [len(s) for s in trimmed]
max_length = max(lengths)
real_part, imag_part = [], []
for x in str_complexes:
# Complex numbers are represented as "(-)xxx(+/-)xxxj"
# The split will give [maybe "-", "xxx", "+/-", "xxx", "j", ""]
# Therefore, the imaginary part is the 4th and 3rd last elements,
# and the real part is everything before the imaginary part
trimmed = re.split(r"([j+-])", x)
real_part.append("".join(trimmed[:-4]))
imag_part.append("".join(trimmed[-4:-2]))

# We want to align the lengths of the real and imaginary parts of each complex
# number, as well as the lengths the real (resp. complex) parts of all numbers
# in the array
n = len(str_complexes)
padded_parts = _trim_zeros_float(real_part + imag_part, decimal)
padded = [
s[: -((k - 1) // 2 + 1)] # real part
+ (max_length - k) // 2 * "0"
+ s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -
+ s[-((k - 1) // 2) : -1] # imaginary part
+ (max_length - k) // 2 * "0"
+ s[-1]
for s, k in zip(trimmed, lengths)
padded_parts[i] # real part (including - or space, possibly "NaN")
+ padded_parts[i + n] # imaginary part (including + or -)
+ "j"
for i in range(n)
]
return padded

Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/io/formats/test_printing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import string

import numpy as np
import pytest

import pandas._config.config as cf

Expand Down Expand Up @@ -207,3 +208,27 @@ def test_multiindex_long_element():
"cccccccccccccccccccccc',)],\n )"
)
assert str(data) == expected


@pytest.mark.parametrize(
"data,output",
[
([2, complex("nan"), 1], [" 2.0+0.0j", " NaN+0.0j", " 1.0+0.0j"]),
([2, complex("nan"), -1], [" 2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]),
([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]),
([-1.23j, complex("nan"), -1], ["-0.00-1.23j", " NaN+0.00j", "-1.00+0.00j"]),
([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", " NaN+0.00j", " 1.23+0.00j"]),
],
)
@pytest.mark.parametrize("as_frame", [True, False])
def test_ser_df_with_complex_nans(data, output, as_frame):
# GH#53762
obj = pd.Series(data)
if as_frame:
obj = obj.to_frame(name="val")
reprs = [f"{i} {val}" for i, val in enumerate(output)]
expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs)
else:
reprs = [f"{i} {val}" for i, val in enumerate(output)]
expected = "\n".join(reprs) + "\ndtype: complex128"
assert str(obj) == expected