Skip to content

Commit 5d81894

Browse files
saucoideTLouf
authored andcommitted
BUG: Check for null values when infering excel in read_clipboard (pandas-dev#41109)
1 parent 50604dc commit 5d81894

File tree

3 files changed

+55
-2
lines changed

3 files changed

+55
-2
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,7 @@ I/O
935935
- Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`)
936936
- Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
937937
- Bug in :func:`read_excel` would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
938-
-
938+
- Bug in :func:`read_clipboard` copying from an excel file shifts values into the wrong column if there are null values in first column (:issue:`41108`)
939939

940940
Period
941941
^^^^^^

pandas/io/clipboards.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,14 @@ def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover
5858
# 0 1 2
5959
# 1 3 4
6060

61-
counts = {x.lstrip().count("\t") for x in lines}
61+
counts = {x.lstrip(" ").count("\t") for x in lines}
6262
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
6363
sep = "\t"
64+
# check the number of leading tabs in the first line
65+
# to account for index columns
66+
index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
67+
if index_length != 0:
68+
kwargs.setdefault("index_col", list(range(index_length)))
6469

6570
# Edge case where sep is specified to be None, return to default
6671
if sep is None and kwargs.get("delim_whitespace") is None:

pandas/tests/io/test_clipboard.py

+48
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,54 @@ def test_read_clipboard_infer_excel(self, request, mock_clipboard):
243243

244244
tm.assert_frame_equal(res, exp)
245245

246+
def test_infer_excel_with_nulls(self, request, mock_clipboard):
247+
# GH41108
248+
text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen"
249+
250+
mock_clipboard[request.node.name] = text
251+
df = read_clipboard()
252+
df_expected = DataFrame(
253+
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}
254+
)
255+
256+
# excel data is parsed correctly
257+
tm.assert_frame_equal(df, df_expected)
258+
259+
@pytest.mark.parametrize(
260+
"multiindex",
261+
[
262+
( # Can't use `dedent` here as it will remove the leading `\t`
263+
"\n".join(
264+
[
265+
"\t\t\tcol1\tcol2",
266+
"A\t0\tTrue\t1\tred",
267+
"A\t1\tTrue\t\tblue",
268+
"B\t0\tFalse\t2\tgreen",
269+
]
270+
),
271+
[["A", "A", "B"], [0, 1, 0], [True, True, False]],
272+
),
273+
(
274+
"\n".join(
275+
["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"]
276+
),
277+
[["A", "A", "B"], [0, 1, 0]],
278+
),
279+
],
280+
)
281+
def test_infer_excel_with_multiindex(self, request, mock_clipboard, multiindex):
282+
# GH41108
283+
284+
mock_clipboard[request.node.name] = multiindex[0]
285+
df = read_clipboard()
286+
df_expected = DataFrame(
287+
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
288+
index=multiindex[1],
289+
)
290+
291+
# excel data is parsed correctly
292+
tm.assert_frame_equal(df, df_expected)
293+
246294
def test_invalid_encoding(self, df):
247295
msg = "clipboard only supports utf-8 encoding"
248296
# test case for testing invalid encoding

0 commit comments

Comments
 (0)