|
9 | 9 | from pandas import DataFrame
|
10 | 10 | from pandas import read_clipboard
|
11 | 11 | from pandas import get_option
|
| 12 | +from pandas.compat import PY2 |
12 | 13 | from pandas.util import testing as tm
|
13 | 14 | from pandas.util.testing import makeCustomDataframe as mkdf
|
14 | 15 | from pandas.io.clipboard.exceptions import PyperclipException
|
15 |
| -from pandas.io.clipboard import clipboard_set |
| 16 | +from pandas.io.clipboard import clipboard_set, clipboard_get |
16 | 17 |
|
17 | 18 |
|
18 | 19 | try:
|
|
22 | 23 | _DEPS_INSTALLED = 0
|
23 | 24 |
|
24 | 25 |
|
| 26 | +def build_kwargs(sep, excel): |
| 27 | + kwargs = {} |
| 28 | + if excel != 'default': |
| 29 | + kwargs['excel'] = excel |
| 30 | + if sep != 'default': |
| 31 | + kwargs['sep'] = sep |
| 32 | + return kwargs |
| 33 | + |
| 34 | + |
| 35 | +@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii', |
| 36 | + 'colwidth', 'mixed', 'float', 'int']) |
| 37 | +def df(request): |
| 38 | + data_type = request.param |
| 39 | + |
| 40 | + if data_type == 'delims': |
| 41 | + return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'], |
| 42 | + 'b': ['hi\'j', 'k\'\'lm']}) |
| 43 | + elif data_type == 'utf8': |
| 44 | + return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], |
| 45 | + 'b': ['øπ∆˚¬', 'œ∑´®']}) |
| 46 | + elif data_type == 'string': |
| 47 | + return mkdf(5, 3, c_idx_type='s', r_idx_type='i', |
| 48 | + c_idx_names=[None], r_idx_names=[None]) |
| 49 | + elif data_type == 'long': |
| 50 | + max_rows = get_option('display.max_rows') |
| 51 | + return mkdf(max_rows + 1, 3, |
| 52 | + data_gen_f=lambda *args: randint(2), |
| 53 | + c_idx_type='s', r_idx_type='i', |
| 54 | + c_idx_names=[None], r_idx_names=[None]) |
| 55 | + elif data_type == 'nonascii': |
| 56 | + return pd.DataFrame({'en': 'in English'.split(), |
| 57 | + 'es': 'en español'.split()}) |
| 58 | + elif data_type == 'colwidth': |
| 59 | + _cw = get_option('display.max_colwidth') + 1 |
| 60 | + return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, |
| 61 | + c_idx_type='s', r_idx_type='i', |
| 62 | + c_idx_names=[None], r_idx_names=[None]) |
| 63 | + elif data_type == 'mixed': |
| 64 | + return DataFrame({'a': np.arange(1.0, 6.0) + 0.01, |
| 65 | + 'b': np.arange(1, 6), |
| 66 | + 'c': list('abcde')}) |
| 67 | + elif data_type == 'float': |
| 68 | + return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01, |
| 69 | + c_idx_type='s', r_idx_type='i', |
| 70 | + c_idx_names=[None], r_idx_names=[None]) |
| 71 | + elif data_type == 'int': |
| 72 | + return mkdf(5, 3, data_gen_f=lambda *args: randint(2), |
| 73 | + c_idx_type='s', r_idx_type='i', |
| 74 | + c_idx_names=[None], r_idx_names=[None]) |
| 75 | + else: |
| 76 | + raise ValueError |
| 77 | + |
| 78 | + |
25 | 79 | @pytest.mark.single
|
26 | 80 | @pytest.mark.skipif(not _DEPS_INSTALLED,
|
27 | 81 | reason="clipboard primitives not installed")
|
28 | 82 | class TestClipboard(object):
|
29 |
| - |
30 |
| - @classmethod |
31 |
| - def setup_class(cls): |
32 |
| - cls.data = {} |
33 |
| - cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i', |
34 |
| - c_idx_names=[None], r_idx_names=[None]) |
35 |
| - cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2), |
36 |
| - c_idx_type='s', r_idx_type='i', |
37 |
| - c_idx_names=[None], r_idx_names=[None]) |
38 |
| - cls.data['float'] = mkdf(5, 3, |
39 |
| - data_gen_f=lambda r, c: float(r) + 0.01, |
40 |
| - c_idx_type='s', r_idx_type='i', |
41 |
| - c_idx_names=[None], r_idx_names=[None]) |
42 |
| - cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, |
43 |
| - 'b': np.arange(1, 6), |
44 |
| - 'c': list('abcde')}) |
45 |
| - |
46 |
| - # Test columns exceeding "max_colwidth" (GH8305) |
47 |
| - _cw = get_option('display.max_colwidth') + 1 |
48 |
| - cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, |
49 |
| - c_idx_type='s', r_idx_type='i', |
50 |
| - c_idx_names=[None], r_idx_names=[None]) |
51 |
| - # Test GH-5346 |
52 |
| - max_rows = get_option('display.max_rows') |
53 |
| - cls.data['longdf'] = mkdf(max_rows + 1, 3, |
54 |
| - data_gen_f=lambda *args: randint(2), |
55 |
| - c_idx_type='s', r_idx_type='i', |
56 |
| - c_idx_names=[None], r_idx_names=[None]) |
57 |
| - # Test for non-ascii text: GH9263 |
58 |
| - cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(), |
59 |
| - 'es': 'en español'.split()}) |
60 |
| - # unicode round trip test for GH 13747, GH 12529 |
61 |
| - cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], |
62 |
| - 'b': ['øπ∆˚¬', 'œ∑´®']}) |
63 |
| - cls.data_types = list(cls.data.keys()) |
64 |
| - |
65 |
| - @classmethod |
66 |
| - def teardown_class(cls): |
67 |
| - del cls.data_types, cls.data |
68 |
| - |
69 |
| - def check_round_trip_frame(self, data_type, excel=None, sep=None, |
| 83 | + def check_round_trip_frame(self, data, excel=None, sep=None, |
70 | 84 | encoding=None):
|
71 |
| - data = self.data[data_type] |
72 | 85 | data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
|
73 |
| - if sep is not None: |
74 |
| - result = read_clipboard(sep=sep, index_col=0, encoding=encoding) |
75 |
| - else: |
76 |
| - result = read_clipboard(encoding=encoding) |
| 86 | + result = read_clipboard(sep=sep or '\t', index_col=0, |
| 87 | + encoding=encoding) |
77 | 88 | tm.assert_frame_equal(data, result, check_dtype=False)
|
78 | 89 |
|
79 |
| - def test_round_trip_frame_sep(self): |
80 |
| - for dt in self.data_types: |
81 |
| - self.check_round_trip_frame(dt, sep=',') |
82 |
| - self.check_round_trip_frame(dt, sep=r'\s+') |
83 |
| - self.check_round_trip_frame(dt, sep='|') |
84 |
| - |
85 |
| - def test_round_trip_frame_string(self): |
86 |
| - for dt in self.data_types: |
87 |
| - self.check_round_trip_frame(dt, excel=False) |
88 |
| - |
89 |
| - def test_round_trip_frame(self): |
90 |
| - for dt in self.data_types: |
91 |
| - self.check_round_trip_frame(dt) |
| 90 | + # Test that default arguments copy as tab delimited |
| 91 | + @pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' |
| 92 | + 'Issue in #21104, Fixed in #21111') |
| 93 | + def test_round_trip_frame(self, df): |
| 94 | + self.check_round_trip_frame(df) |
| 95 | + |
| 96 | + # Test that explicit delimiters are respected |
| 97 | + @pytest.mark.parametrize('sep', ['\t', ',', '|']) |
| 98 | + def test_round_trip_frame_sep(self, df, sep): |
| 99 | + self.check_round_trip_frame(df, sep=sep) |
| 100 | + |
| 101 | + # Test white space separator |
| 102 | + @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " |
| 103 | + "aren't handled correctly in default c engine. Fixed " |
| 104 | + "in #21111 by defaulting to python engine for " |
| 105 | + "whitespace separator") |
| 106 | + def test_round_trip_frame_string(self, df): |
| 107 | + df.to_clipboard(excel=False, sep=None) |
| 108 | + result = read_clipboard() |
| 109 | + assert df.to_string() == result.to_string() |
| 110 | + assert df.shape == result.shape |
| 111 | + |
| 112 | + # Two character separator is not supported in to_clipboard |
| 113 | + # Test that multi-character separators are not silently passed |
| 114 | + @pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") |
| 115 | + def test_excel_sep_warning(self, df): |
| 116 | + with tm.assert_produces_warning(): |
| 117 | + df.to_clipboard(excel=True, sep=r'\t') |
| 118 | + |
| 119 | + # Separator is ignored when excel=False and should produce a warning |
| 120 | + @pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") |
| 121 | + def test_copy_delim_warning(self, df): |
| 122 | + with tm.assert_produces_warning(): |
| 123 | + df.to_clipboard(excel=False, sep='\t') |
| 124 | + |
| 125 | + # Tests that the default behavior of to_clipboard is tab |
| 126 | + # delimited and excel="True" |
| 127 | + @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in " |
| 128 | + "#21104, Fixed in #21111") |
| 129 | + @pytest.mark.parametrize('sep', ['\t', None, 'default']) |
| 130 | + @pytest.mark.parametrize('excel', [True, None, 'default']) |
| 131 | + def test_clipboard_copy_tabs_default(self, sep, excel, df): |
| 132 | + kwargs = build_kwargs(sep, excel) |
| 133 | + df.to_clipboard(**kwargs) |
| 134 | + if PY2: |
| 135 | + # to_clipboard copies unicode, to_csv produces bytes. This is |
| 136 | + # expected behavior |
| 137 | + assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t') |
| 138 | + else: |
| 139 | + assert clipboard_get() == df.to_csv(sep='\t') |
| 140 | + |
| 141 | + # Tests reading of white space separated tables |
| 142 | + @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " |
| 143 | + "aren't handled correctly. in default c engine. Fixed " |
| 144 | + "in #21111 by defaulting to python engine for " |
| 145 | + "whitespace separator") |
| 146 | + @pytest.mark.parametrize('sep', [None, 'default']) |
| 147 | + @pytest.mark.parametrize('excel', [False]) |
| 148 | + def test_clipboard_copy_strings(self, sep, excel, df): |
| 149 | + kwargs = build_kwargs(sep, excel) |
| 150 | + df.to_clipboard(**kwargs) |
| 151 | + result = read_clipboard(sep=r'\s+') |
| 152 | + assert result.to_string() == df.to_string() |
| 153 | + assert df.shape == result.shape |
92 | 154 |
|
93 | 155 | def test_read_clipboard_infer_excel(self):
|
94 | 156 | # gh-19010: avoid warnings
|
@@ -124,15 +186,15 @@ def test_read_clipboard_infer_excel(self):
|
124 | 186 |
|
125 | 187 | tm.assert_frame_equal(res, exp)
|
126 | 188 |
|
127 |
| - def test_invalid_encoding(self): |
| 189 | + def test_invalid_encoding(self, df): |
128 | 190 | # test case for testing invalid encoding
|
129 |
| - data = self.data['string'] |
130 | 191 | with pytest.raises(ValueError):
|
131 |
| - data.to_clipboard(encoding='ascii') |
| 192 | + df.to_clipboard(encoding='ascii') |
132 | 193 | with pytest.raises(NotImplementedError):
|
133 | 194 | pd.read_clipboard(encoding='ascii')
|
134 | 195 |
|
135 |
| - def test_round_trip_valid_encodings(self): |
136 |
| - for enc in ['UTF-8', 'utf-8', 'utf8']: |
137 |
| - for dt in self.data_types: |
138 |
| - self.check_round_trip_frame(dt, encoding=enc) |
| 196 | + @pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' |
| 197 | + 'Issue in #21104, Fixed in #21111') |
| 198 | + @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8']) |
| 199 | + def test_round_trip_valid_encodings(self, enc, df): |
| 200 | + self.check_round_trip_frame(df, encoding=enc) |
0 commit comments