Skip to content

Commit 6f55ab9

Browse files
Ajay Saxenajorisvandenbossche
Ajay Saxena
authored andcommitted
BUG in clipboard (linux, python2) with unicode and separator (GH13747)
vendered updated version of Pyperclip closes pandas-dev#13747 closes pandas-dev#14362 closes pandas-dev#12807 closes pandas-dev#12529 Author: Ajay Saxena <[email protected]> Author: Ajay Saxena <[email protected]> Closes pandas-dev#14599 from aileronajay/master and squashes the following commits: 2aafb66 [Ajay Saxena] moved comment inside test and added github issue labels to test b74fbc1 [Ajay Saxena] ignore lint test for pyperclip files 9db42d8 [Ajay Saxena] whatsnew conflict 1dca292 [Ajay Saxena] conflict resolution 98b61e8 [Ajay Saxena] merge conflict cedb690 [Ajay Saxena] merge conflict in whats new file 7af95da [Ajay Saxena] merging lastest changes ac8ae60 [Ajay Saxena] skip clipboard test if clipboard primitives are absent b03ed56 [Ajay Saxena] changed whatsnew file c0aafd7 [Ajay Saxena] Merge branch 'test_branch' 9946fb7 [Ajay Saxena] Merge branch 'master' of https://github.com/pandas-dev/pandas into test_branch ed1375f [Ajay Saxena] Merge branch 'test_branch' 0665fd4 [Ajay Saxena] fixed linting and test case as per code review d202fd0 [Ajay Saxena] added test for valid encoding, modified setup.py so that pandas/util/clipboard can be found dd57ae3 [Ajay Saxena] code review changes and read clipboard invalid encoding test 71d58d0 [Ajay Saxena] testing encoding in kwargs to to_clipboard and test case for the same 02f87b0 [Ajay Saxena] removed duplicate files 825bbe2 [Ajay Saxena] all files related to pyperclip are under pandas.util.clipboard c5a87d8 [Ajay Saxena] Merge branch 'test_branch' of https://github.com/aileronajay/pandas into test_branch f708c2e [Ajay Saxena] Merge branch 'master' of https://github.com/aileronajay/pandas d565b1f [Ajay Saxena] updated pyperclip to the latest version 14d94a0 [Ajay Saxena] changed the pandas util clipboard file to return unicode if the python version is 2, else str 66d8ebf [Ajay Saxena] removed the disabled tag for clipboard test so that we can check if they pass after this change edb8553 [Ajay Saxena] refactored the new unicode test to be in sync with the rest of the file c83d000 [Ajay Saxena] added test case for unicode round trip fb922d6 [Ajay Saxena] changes for GH 13747 (cherry picked from commit 4a1a330)
1 parent e5a965b commit 6f55ab9

File tree

9 files changed

+469
-279
lines changed

9 files changed

+469
-279
lines changed

doc/source/whatsnew/v0.19.2.txt

+10
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,14 @@ Bug Fixes
5353

5454

5555

56+
57+
58+
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
59+
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
60+
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
61+
62+
63+
64+
65+
5666
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)

pandas/io/clipboard.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
""" io on the clipboard """
22
from pandas import compat, get_option, option_context, DataFrame
3-
from pandas.compat import StringIO
3+
from pandas.compat import StringIO, PY2
44

55

66
def read_clipboard(**kwargs): # pragma: no cover
@@ -14,6 +14,14 @@ def read_clipboard(**kwargs): # pragma: no cover
1414
-------
1515
parsed : DataFrame
1616
"""
17+
encoding = kwargs.pop('encoding', 'utf-8')
18+
19+
# only utf-8 is valid for passed value because that's what clipboard
20+
# supports
21+
if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
22+
raise NotImplementedError(
23+
'reading from clipboard only supports utf-8 encoding')
24+
1725
from pandas.util.clipboard import clipboard_get
1826
from pandas.io.parsers import read_table
1927
text = clipboard_get()
@@ -74,6 +82,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
7482
- Windows:
7583
- OS X:
7684
"""
85+
encoding = kwargs.pop('encoding', 'utf-8')
86+
87+
# testing if an invalid encoding is passed to clipboard
88+
if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
89+
raise ValueError('clipboard only supports utf-8 encoding')
90+
7791
from pandas.util.clipboard import clipboard_set
7892
if excel is None:
7993
excel = True
@@ -83,8 +97,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
8397
if sep is None:
8498
sep = '\t'
8599
buf = StringIO()
86-
obj.to_csv(buf, sep=sep, **kwargs)
87-
clipboard_set(buf.getvalue())
100+
# clipboard_set (pyperclip) expects unicode
101+
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
102+
text = buf.getvalue()
103+
if PY2:
104+
text = text.decode('utf-8')
105+
clipboard_set(text)
88106
return
89107
except:
90108
pass

pandas/io/tests/test_clipboard.py

+26-9
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99
from pandas import read_clipboard
1010
from pandas import get_option
1111
from pandas.util import testing as tm
12-
from pandas.util.testing import makeCustomDataframe as mkdf, disabled
12+
from pandas.util.testing import makeCustomDataframe as mkdf
13+
from pandas.util.clipboard.exceptions import PyperclipException
1314

1415

1516
try:
16-
import pandas.util.clipboard # noqa
17-
except OSError:
18-
raise nose.SkipTest("no clipboard found")
17+
DataFrame({'A': [1, 2]}).to_clipboard()
18+
except PyperclipException:
19+
raise nose.SkipTest("clipboard primitives not installed")
1920

2021

21-
@disabled
2222
class TestClipboard(tm.TestCase):
2323

2424
@classmethod
@@ -52,20 +52,24 @@ def setUpClass(cls):
5252
# Test for non-ascii text: GH9263
5353
cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
5454
'es': 'en español'.split()})
55+
# unicode round trip test for GH 13747, GH 12529
56+
cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
57+
'b': ['øπ∆˚¬', 'œ∑´®']})
5558
cls.data_types = list(cls.data.keys())
5659

5760
@classmethod
5861
def tearDownClass(cls):
5962
super(TestClipboard, cls).tearDownClass()
6063
del cls.data_types, cls.data
6164

62-
def check_round_trip_frame(self, data_type, excel=None, sep=None):
65+
def check_round_trip_frame(self, data_type, excel=None, sep=None,
66+
encoding=None):
6367
data = self.data[data_type]
64-
data.to_clipboard(excel=excel, sep=sep)
68+
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
6569
if sep is not None:
66-
result = read_clipboard(sep=sep, index_col=0)
70+
result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
6771
else:
68-
result = read_clipboard()
72+
result = read_clipboard(encoding=encoding)
6973
tm.assert_frame_equal(data, result, check_dtype=False)
7074

7175
def test_round_trip_frame_sep(self):
@@ -113,3 +117,16 @@ def test_read_clipboard_infer_excel(self):
113117
exp = pd.read_clipboard()
114118

115119
tm.assert_frame_equal(res, exp)
120+
121+
def test_invalid_encoding(self):
122+
# test case for testing invalid encoding
123+
data = self.data['string']
124+
with tm.assertRaises(ValueError):
125+
data.to_clipboard(encoding='ascii')
126+
with tm.assertRaises(NotImplementedError):
127+
pd.read_clipboard(encoding='ascii')
128+
129+
def test_round_trip_valid_encodings(self):
130+
for enc in ['UTF-8', 'utf-8', 'utf8']:
131+
for dt in self.data_types:
132+
self.check_round_trip_frame(dt, encoding=enc)

0 commit comments

Comments
 (0)