Skip to content

Commit 4a1a330

Browse files
Ajay Saxenajreback
Ajay Saxena
authored andcommitted
BUG in clipboard (linux, python2) with unicode and separator (GH13747)
vendered updated version of Pyperclip closes #13747 closes #14362 closes #12807 closes #12529 Author: Ajay Saxena <[email protected]> Author: Ajay Saxena <[email protected]> Closes #14599 from aileronajay/master and squashes the following commits: 2aafb66 [Ajay Saxena] moved comment inside test and added github issue labels to test b74fbc1 [Ajay Saxena] ignore lint test for pyperclip files 9db42d8 [Ajay Saxena] whatsnew conflict 1dca292 [Ajay Saxena] conflict resolution 98b61e8 [Ajay Saxena] merge conflict cedb690 [Ajay Saxena] merge conflict in whats new file 7af95da [Ajay Saxena] merging lastest changes ac8ae60 [Ajay Saxena] skip clipboard test if clipboard primitives are absent b03ed56 [Ajay Saxena] changed whatsnew file c0aafd7 [Ajay Saxena] Merge branch 'test_branch' 9946fb7 [Ajay Saxena] Merge branch 'master' of https://github.com/pandas-dev/pandas into test_branch ed1375f [Ajay Saxena] Merge branch 'test_branch' 0665fd4 [Ajay Saxena] fixed linting and test case as per code review d202fd0 [Ajay Saxena] added test for valid encoding, modified setup.py so that pandas/util/clipboard can be found dd57ae3 [Ajay Saxena] code review changes and read clipboard invalid encoding test 71d58d0 [Ajay Saxena] testing encoding in kwargs to to_clipboard and test case for the same 02f87b0 [Ajay Saxena] removed duplicate files 825bbe2 [Ajay Saxena] all files related to pyperclip are under pandas.util.clipboard c5a87d8 [Ajay Saxena] Merge branch 'test_branch' of https://github.com/aileronajay/pandas into test_branch f708c2e [Ajay Saxena] Merge branch 'master' of https://github.com/aileronajay/pandas d565b1f [Ajay Saxena] updated pyperclip to the latest version 14d94a0 [Ajay Saxena] changed the pandas util clipboard file to return unicode if the python version is 2, else str 66d8ebf [Ajay Saxena] removed the disabled tag for clipboard test so that we can check if they pass after this change edb8553 [Ajay Saxena] refactored the new unicode test to be in sync with the rest of the file c83d000 [Ajay Saxena] added test case for unicode round trip fb922d6 [Ajay Saxena] changes for GH 13747
1 parent c045e1d commit 4a1a330

File tree

9 files changed

+469
-279
lines changed

9 files changed

+469
-279
lines changed

doc/source/whatsnew/v0.19.2.txt

+10
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,14 @@ Bug Fixes
5353

5454

5555

56+
57+
58+
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
59+
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
60+
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
61+
62+
63+
64+
65+
5666
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)

pandas/io/clipboard.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
""" io on the clipboard """
22
from pandas import compat, get_option, option_context, DataFrame
3-
from pandas.compat import StringIO
3+
from pandas.compat import StringIO, PY2
44

55

66
def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
@@ -18,6 +18,14 @@ def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
1818
-------
1919
parsed : DataFrame
2020
"""
21+
encoding = kwargs.pop('encoding', 'utf-8')
22+
23+
# only utf-8 is valid for passed value because that's what clipboard
24+
# supports
25+
if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
26+
raise NotImplementedError(
27+
'reading from clipboard only supports utf-8 encoding')
28+
2129
from pandas.util.clipboard import clipboard_get
2230
from pandas.io.parsers import read_table
2331
text = clipboard_get()
@@ -78,6 +86,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
7886
- Windows:
7987
- OS X:
8088
"""
89+
encoding = kwargs.pop('encoding', 'utf-8')
90+
91+
# testing if an invalid encoding is passed to clipboard
92+
if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
93+
raise ValueError('clipboard only supports utf-8 encoding')
94+
8195
from pandas.util.clipboard import clipboard_set
8296
if excel is None:
8397
excel = True
@@ -87,8 +101,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
87101
if sep is None:
88102
sep = '\t'
89103
buf = StringIO()
90-
obj.to_csv(buf, sep=sep, **kwargs)
91-
clipboard_set(buf.getvalue())
104+
# clipboard_set (pyperclip) expects unicode
105+
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
106+
text = buf.getvalue()
107+
if PY2:
108+
text = text.decode('utf-8')
109+
clipboard_set(text)
92110
return
93111
except:
94112
pass

pandas/io/tests/test_clipboard.py

+26-9
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99
from pandas import read_clipboard
1010
from pandas import get_option
1111
from pandas.util import testing as tm
12-
from pandas.util.testing import makeCustomDataframe as mkdf, disabled
12+
from pandas.util.testing import makeCustomDataframe as mkdf
13+
from pandas.util.clipboard.exceptions import PyperclipException
1314

1415

1516
try:
16-
import pandas.util.clipboard # noqa
17-
except OSError:
18-
raise nose.SkipTest("no clipboard found")
17+
DataFrame({'A': [1, 2]}).to_clipboard()
18+
except PyperclipException:
19+
raise nose.SkipTest("clipboard primitives not installed")
1920

2021

21-
@disabled
2222
class TestClipboard(tm.TestCase):
2323

2424
@classmethod
@@ -52,20 +52,24 @@ def setUpClass(cls):
5252
# Test for non-ascii text: GH9263
5353
cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
5454
'es': 'en español'.split()})
55+
# unicode round trip test for GH 13747, GH 12529
56+
cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
57+
'b': ['øπ∆˚¬', 'œ∑´®']})
5558
cls.data_types = list(cls.data.keys())
5659

5760
@classmethod
5861
def tearDownClass(cls):
5962
super(TestClipboard, cls).tearDownClass()
6063
del cls.data_types, cls.data
6164

62-
def check_round_trip_frame(self, data_type, excel=None, sep=None):
65+
def check_round_trip_frame(self, data_type, excel=None, sep=None,
66+
encoding=None):
6367
data = self.data[data_type]
64-
data.to_clipboard(excel=excel, sep=sep)
68+
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
6569
if sep is not None:
66-
result = read_clipboard(sep=sep, index_col=0)
70+
result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
6771
else:
68-
result = read_clipboard()
72+
result = read_clipboard(encoding=encoding)
6973
tm.assert_frame_equal(data, result, check_dtype=False)
7074

7175
def test_round_trip_frame_sep(self):
@@ -115,3 +119,16 @@ def test_read_clipboard_infer_excel(self):
115119
exp = pd.read_clipboard()
116120

117121
tm.assert_frame_equal(res, exp)
122+
123+
def test_invalid_encoding(self):
124+
# test case for testing invalid encoding
125+
data = self.data['string']
126+
with tm.assertRaises(ValueError):
127+
data.to_clipboard(encoding='ascii')
128+
with tm.assertRaises(NotImplementedError):
129+
pd.read_clipboard(encoding='ascii')
130+
131+
def test_round_trip_valid_encodings(self):
132+
for enc in ['UTF-8', 'utf-8', 'utf8']:
133+
for dt in self.data_types:
134+
self.check_round_trip_frame(dt, encoding=enc)

0 commit comments

Comments
 (0)