pandas-dev · aileronajay · Nov 7, 2016 · Nov 8, 2016 · Nov 8, 2016 · Nov 8, 2016
diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
@@ -25,3 +25,4 @@ Bug Fixes
 
 - compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
 - allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)
+- BUG in clipboard (linux, python2) with unicode and separator (:issue:`13747`)
diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py
@@ -1,6 +1,6 @@
 """ io on the clipboard """
 from pandas import compat, get_option, option_context, DataFrame
-from pandas.compat import StringIO
+from pandas.compat import StringIO, PY2
 
 
 def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
@@ -18,6 +18,14 @@ def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
     -------
     parsed : DataFrame
     """
+    encoding = kwargs.pop('encoding', 'utf-8')
+
+    # only utf-8 is valid for passed value because that's what clipboard
+    # supports
+    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
+        raise NotImplementedError(
+            'reading from clipboard only supports utf-8 encoding')
+
     from pandas.util.clipboard import clipboard_get
     from pandas.io.parsers import read_table
     text = clipboard_get()
@@ -78,6 +86,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover
       - Windows:
       - OS X:
     """
+    encoding = kwargs.pop('encoding', 'utf-8')
+
+    # testing if an invalid encoding is passed to clipboard
+    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
+        raise ValueError('clipboard only supports utf-8 encoding')
+
     from pandas.util.clipboard import clipboard_set
     if excel is None:
         excel = True
@@ -87,8 +101,12 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover
             if sep is None:
                 sep = '\t'
             buf = StringIO()
-            obj.to_csv(buf, sep=sep, **kwargs)
-            clipboard_set(buf.getvalue())
+            # clipboard_set (pyperclip) expects unicode
+            obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
+            text = buf.getvalue()
+            if PY2:
+                text = text.decode('utf-8')
+            clipboard_set(text)
             return
         except:
             pass

diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py
@@ -9,7 +9,7 @@
 from pandas import read_clipboard
 from pandas import get_option
 from pandas.util import testing as tm
-from pandas.util.testing import makeCustomDataframe as mkdf, disabled
+from pandas.util.testing import makeCustomDataframe as mkdf
 
 
 try:
@@ -18,7 +18,6 @@
     raise nose.SkipTest("no clipboard found")
 
 
-@disabled
 class TestClipboard(tm.TestCase):
 
     @classmethod
@@ -52,20 +51,24 @@ def setUpClass(cls):
         # Test for non-ascii text: GH9263
         cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
                                              'es': 'en español'.split()})
+        # unicode round trip test for GH 13747
+        cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
+                                        'b': ['øπ∆˚¬', 'œ∑´®']})
         cls.data_types = list(cls.data.keys())
 
     @classmethod
     def tearDownClass(cls):
         super(TestClipboard, cls).tearDownClass()
         del cls.data_types, cls.data
 
-    def check_round_trip_frame(self, data_type, excel=None, sep=None):
+    def check_round_trip_frame(self, data_type, excel=None, sep=None,
+                               encoding=None):
         data = self.data[data_type]
-        data.to_clipboard(excel=excel, sep=sep)
+        data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
         if sep is not None:
-            result = read_clipboard(sep=sep, index_col=0)
+            result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
         else:
-            result = read_clipboard()
+            result = read_clipboard(encoding=encoding)
         tm.assert_frame_equal(data, result, check_dtype=False)
 
     def test_round_trip_frame_sep(self):
@@ -115,3 +118,16 @@ def test_read_clipboard_infer_excel(self):
         exp = pd.read_clipboard()
 
         tm.assert_frame_equal(res, exp)
+
+    # test case for testing invalid encoding
+    def test_invalid_encoding(self):
+        data = self.data['string']
+        with tm.assertRaises(ValueError):
+            data.to_clipboard(encoding='ascii')
+        with tm.assertRaises(NotImplementedError):
+            pd.read_clipboard(encoding='ascii')
+
+    def test_round_trip_valid_encodings(self):
+        for enc in ['UTF-8', 'utf-8', 'utf8']:
+            for dt in self.data_types:
+                self.check_round_trip_frame(dt, encoding=enc)
diff --git a/pandas/util/clipboard.py b/pandas/util/clipboard.py
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,3 +25,4 @@ Bug Fixes

		- compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
		- allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)
		- BUG in clipboard (linux, python2) with unicode and separator (:issue:`13747`)
Copy link Contributor jreback Nov 17, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. list all of the isues this is expected to close Copy link Contributor Author aileronajay Nov 17, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. implemented this change now