pandas-dev · jreback · Jun 26, 2018 · May 22, 2018 · May 22, 2018 · May 22, 2018
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
@@ -9,10 +9,11 @@
 from pandas import DataFrame
 from pandas import read_clipboard
 from pandas import get_option
+from pandas.compat import PY2
 from pandas.util import testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf
 from pandas.io.clipboard.exceptions import PyperclipException
-from pandas.io.clipboard import clipboard_set
+from pandas.io.clipboard import clipboard_set, clipboard_get
 
 
 try:
@@ -22,73 +23,134 @@
     _DEPS_INSTALLED = 0
 
 
+def build_kwargs(sep, excel):
+    kwargs = {}
+    if excel != 'default':
+        kwargs['excel'] = excel
+    if sep != 'default':
+        kwargs['sep'] = sep
+    return kwargs
+
+
+@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
+                        'colwidth', 'mixed', 'float', 'int'])
+def df(request):
+    data_type = request.param
+
+    if data_type == 'delims':
+        return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
+                             'b': ['hi\'j', 'k\'\'lm']})
+    elif data_type == 'utf8':
+        return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
+                             'b': ['øπ∆˚¬', 'œ∑´®']})
+    elif data_type == 'string':
+        return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'long':
+        max_rows = get_option('display.max_rows')
+        return mkdf(max_rows + 1, 3,
+                    data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'nonascii':
+        return pd.DataFrame({'en': 'in English'.split(),
+                             'es': 'en español'.split()})
+    elif data_type == 'colwidth':
+        _cw = get_option('display.max_colwidth') + 1
+        return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'mixed':
+        return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
+                          'b': np.arange(1, 6),
+                          'c': list('abcde')})
+    elif data_type == 'float':
+        return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'int':
+        return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    else:
+        raise ValueError
+
+
 @pytest.mark.single
 @pytest.mark.skipif(not _DEPS_INSTALLED,
                     reason="clipboard primitives not installed")
 class TestClipboard(object):
-
-    @classmethod
-    def setup_class(cls):
-        cls.data = {}
-        cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i',
-                                  c_idx_names=[None], r_idx_names=[None])
-        cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2),
-                               c_idx_type='s', r_idx_type='i',
-                               c_idx_names=[None], r_idx_names=[None])
-        cls.data['float'] = mkdf(5, 3,
-                                 data_gen_f=lambda r, c: float(r) + 0.01,
-                                 c_idx_type='s', r_idx_type='i',
-                                 c_idx_names=[None], r_idx_names=[None])
-        cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
-                                       'b': np.arange(1, 6),
-                                       'c': list('abcde')})
-
-        # Test columns exceeding "max_colwidth" (GH8305)
-        _cw = get_option('display.max_colwidth') + 1
-        cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
-                                    c_idx_type='s', r_idx_type='i',
-                                    c_idx_names=[None], r_idx_names=[None])
-        # Test GH-5346
-        max_rows = get_option('display.max_rows')
-        cls.data['longdf'] = mkdf(max_rows + 1, 3,
-                                  data_gen_f=lambda *args: randint(2),
-                                  c_idx_type='s', r_idx_type='i',
-                                  c_idx_names=[None], r_idx_names=[None])
-        # Test for non-ascii text: GH9263
-        cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
-                                             'es': 'en español'.split()})
-        # unicode round trip test for GH 13747, GH 12529
-        cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
-                                         'b': ['øπ∆˚¬', 'œ∑´®']})
-        cls.data_types = list(cls.data.keys())
-
-    @classmethod
-    def teardown_class(cls):
-        del cls.data_types, cls.data
-
-    def check_round_trip_frame(self, data_type, excel=None, sep=None,
+    def check_round_trip_frame(self, data, excel=None, sep=None,
                                encoding=None):
-        data = self.data[data_type]
         data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
-        if sep is not None:
-            result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
-        else:
-            result = read_clipboard(encoding=encoding)
+        result = read_clipboard(sep=sep or '\t', index_col=0,
+                                encoding=encoding)
         tm.assert_frame_equal(data, result, check_dtype=False)
 
-    def test_round_trip_frame_sep(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt, sep=',')
-            self.check_round_trip_frame(dt, sep=r'\s+')
-            self.check_round_trip_frame(dt, sep='|')
-
-    def test_round_trip_frame_string(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt, excel=False)
-
-    def test_round_trip_frame(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt)
+    # Test that default arguments copy as tab delimited
+    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
+                       'Issue in #21104, Fixed in #21111')
+    def test_round_trip_frame(self, df):
+        self.check_round_trip_frame(df)
+
+    # Test that explicit delimiters are respected
+    @pytest.mark.parametrize('sep', ['\t', ',', '|'])
+    def test_round_trip_frame_sep(self, df, sep):
+        self.check_round_trip_frame(df, sep=sep)
+
+    # Test white space separator
+    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
+                       "aren't handled correctly in default c engine. Fixed "
+                       "in #21111 by defaulting to python engine for "
+                       "whitespace separator")
+    def test_round_trip_frame_string(self, df):
+        df.to_clipboard(excel=False, sep=None)
+        result = read_clipboard()
+        assert df.to_string() == result.to_string()
+        assert df.shape == result.shape
+
+    # Two character separator is not supported in to_clipboard
+    # Test that multi-character separators are not silently passed
+    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
+    def test_excel_sep_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=True, sep=r'\t')
+
+    # Separator is ignored when excel=False and should produce a warning
+    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
+    def test_copy_delim_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=False, sep='\t')
+
+    # Tests that the default behavior of to_clipboard is tab
+    # delimited and excel="True"
+    @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
+                       "#21104, Fixed in #21111")
+    @pytest.mark.parametrize('sep', ['\t', None, 'default'])
+    @pytest.mark.parametrize('excel', [True, None, 'default'])
+    def test_clipboard_copy_tabs_default(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        if PY2:
+            # to_clipboard copies unicode, to_csv produces bytes. This is
+            # expected behavior
+            assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
+        else:
+            assert clipboard_get() == df.to_csv(sep='\t')
+
+    # Tests reading of white space separated tables
+    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
+                       "aren't handled correctly. in default c engine. Fixed "
+                       "in #21111 by defaulting to python engine for "
+                       "whitespace separator")
+    @pytest.mark.parametrize('sep', [None, 'default'])
+    @pytest.mark.parametrize('excel', [False])
+    def test_clipboard_copy_strings(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        result = read_clipboard(sep=r'\s+')
+        assert result.to_string() == df.to_string()
+        assert df.shape == result.shape
 
     def test_read_clipboard_infer_excel(self):
         # gh-19010: avoid warnings
@@ -124,15 +186,15 @@ def test_read_clipboard_infer_excel(self):
 
         tm.assert_frame_equal(res, exp)
 
-    def test_invalid_encoding(self):
+    def test_invalid_encoding(self, df):
         # test case for testing invalid encoding
-        data = self.data['string']
         with pytest.raises(ValueError):
-            data.to_clipboard(encoding='ascii')
+            df.to_clipboard(encoding='ascii')
         with pytest.raises(NotImplementedError):
             pd.read_clipboard(encoding='ascii')
 
-    def test_round_trip_valid_encodings(self):
-        for enc in ['UTF-8', 'utf-8', 'utf8']:
-            for dt in self.data_types:
-                self.check_round_trip_frame(dt, encoding=enc)
+    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
+                       'Issue in #21104, Fixed in #21111')
+    @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
+    def test_round_trip_valid_encodings(self, enc, df):
+        self.check_round_trip_frame(df, encoding=enc)