Skip to content

Commit c46fc3c

Browse files
Cleanup clipboard tests (pandas-dev#21163)
(cherry picked from commit 9d38e0e)
1 parent 14d65cd commit c46fc3c

File tree

1 file changed

+129
-67
lines changed

1 file changed

+129
-67
lines changed

pandas/tests/io/test_clipboard.py

+129-67
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99
from pandas import DataFrame
1010
from pandas import read_clipboard
1111
from pandas import get_option
12+
from pandas.compat import PY2
1213
from pandas.util import testing as tm
1314
from pandas.util.testing import makeCustomDataframe as mkdf
1415
from pandas.io.clipboard.exceptions import PyperclipException
15-
from pandas.io.clipboard import clipboard_set
16+
from pandas.io.clipboard import clipboard_set, clipboard_get
1617

1718

1819
try:
@@ -22,73 +23,134 @@
2223
_DEPS_INSTALLED = 0
2324

2425

26+
def build_kwargs(sep, excel):
27+
kwargs = {}
28+
if excel != 'default':
29+
kwargs['excel'] = excel
30+
if sep != 'default':
31+
kwargs['sep'] = sep
32+
return kwargs
33+
34+
35+
@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
36+
'colwidth', 'mixed', 'float', 'int'])
37+
def df(request):
38+
data_type = request.param
39+
40+
if data_type == 'delims':
41+
return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
42+
'b': ['hi\'j', 'k\'\'lm']})
43+
elif data_type == 'utf8':
44+
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
45+
'b': ['øπ∆˚¬', 'œ∑´®']})
46+
elif data_type == 'string':
47+
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
48+
c_idx_names=[None], r_idx_names=[None])
49+
elif data_type == 'long':
50+
max_rows = get_option('display.max_rows')
51+
return mkdf(max_rows + 1, 3,
52+
data_gen_f=lambda *args: randint(2),
53+
c_idx_type='s', r_idx_type='i',
54+
c_idx_names=[None], r_idx_names=[None])
55+
elif data_type == 'nonascii':
56+
return pd.DataFrame({'en': 'in English'.split(),
57+
'es': 'en español'.split()})
58+
elif data_type == 'colwidth':
59+
_cw = get_option('display.max_colwidth') + 1
60+
return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
61+
c_idx_type='s', r_idx_type='i',
62+
c_idx_names=[None], r_idx_names=[None])
63+
elif data_type == 'mixed':
64+
return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
65+
'b': np.arange(1, 6),
66+
'c': list('abcde')})
67+
elif data_type == 'float':
68+
return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
69+
c_idx_type='s', r_idx_type='i',
70+
c_idx_names=[None], r_idx_names=[None])
71+
elif data_type == 'int':
72+
return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
73+
c_idx_type='s', r_idx_type='i',
74+
c_idx_names=[None], r_idx_names=[None])
75+
else:
76+
raise ValueError
77+
78+
2579
@pytest.mark.single
2680
@pytest.mark.skipif(not _DEPS_INSTALLED,
2781
reason="clipboard primitives not installed")
2882
class TestClipboard(object):
29-
30-
@classmethod
31-
def setup_class(cls):
32-
cls.data = {}
33-
cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i',
34-
c_idx_names=[None], r_idx_names=[None])
35-
cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2),
36-
c_idx_type='s', r_idx_type='i',
37-
c_idx_names=[None], r_idx_names=[None])
38-
cls.data['float'] = mkdf(5, 3,
39-
data_gen_f=lambda r, c: float(r) + 0.01,
40-
c_idx_type='s', r_idx_type='i',
41-
c_idx_names=[None], r_idx_names=[None])
42-
cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
43-
'b': np.arange(1, 6),
44-
'c': list('abcde')})
45-
46-
# Test columns exceeding "max_colwidth" (GH8305)
47-
_cw = get_option('display.max_colwidth') + 1
48-
cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
49-
c_idx_type='s', r_idx_type='i',
50-
c_idx_names=[None], r_idx_names=[None])
51-
# Test GH-5346
52-
max_rows = get_option('display.max_rows')
53-
cls.data['longdf'] = mkdf(max_rows + 1, 3,
54-
data_gen_f=lambda *args: randint(2),
55-
c_idx_type='s', r_idx_type='i',
56-
c_idx_names=[None], r_idx_names=[None])
57-
# Test for non-ascii text: GH9263
58-
cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
59-
'es': 'en español'.split()})
60-
# unicode round trip test for GH 13747, GH 12529
61-
cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
62-
'b': ['øπ∆˚¬', 'œ∑´®']})
63-
cls.data_types = list(cls.data.keys())
64-
65-
@classmethod
66-
def teardown_class(cls):
67-
del cls.data_types, cls.data
68-
69-
def check_round_trip_frame(self, data_type, excel=None, sep=None,
83+
def check_round_trip_frame(self, data, excel=None, sep=None,
7084
encoding=None):
71-
data = self.data[data_type]
7285
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
73-
if sep is not None:
74-
result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
75-
else:
76-
result = read_clipboard(encoding=encoding)
86+
result = read_clipboard(sep=sep or '\t', index_col=0,
87+
encoding=encoding)
7788
tm.assert_frame_equal(data, result, check_dtype=False)
7889

79-
def test_round_trip_frame_sep(self):
80-
for dt in self.data_types:
81-
self.check_round_trip_frame(dt, sep=',')
82-
self.check_round_trip_frame(dt, sep=r'\s+')
83-
self.check_round_trip_frame(dt, sep='|')
84-
85-
def test_round_trip_frame_string(self):
86-
for dt in self.data_types:
87-
self.check_round_trip_frame(dt, excel=False)
88-
89-
def test_round_trip_frame(self):
90-
for dt in self.data_types:
91-
self.check_round_trip_frame(dt)
90+
# Test that default arguments copy as tab delimited
91+
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
92+
'Issue in #21104, Fixed in #21111')
93+
def test_round_trip_frame(self, df):
94+
self.check_round_trip_frame(df)
95+
96+
# Test that explicit delimiters are respected
97+
@pytest.mark.parametrize('sep', ['\t', ',', '|'])
98+
def test_round_trip_frame_sep(self, df, sep):
99+
self.check_round_trip_frame(df, sep=sep)
100+
101+
# Test white space separator
102+
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
103+
"aren't handled correctly in default c engine. Fixed "
104+
"in #21111 by defaulting to python engine for "
105+
"whitespace separator")
106+
def test_round_trip_frame_string(self, df):
107+
df.to_clipboard(excel=False, sep=None)
108+
result = read_clipboard()
109+
assert df.to_string() == result.to_string()
110+
assert df.shape == result.shape
111+
112+
# Two character separator is not supported in to_clipboard
113+
# Test that multi-character separators are not silently passed
114+
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
115+
def test_excel_sep_warning(self, df):
116+
with tm.assert_produces_warning():
117+
df.to_clipboard(excel=True, sep=r'\t')
118+
119+
# Separator is ignored when excel=False and should produce a warning
120+
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
121+
def test_copy_delim_warning(self, df):
122+
with tm.assert_produces_warning():
123+
df.to_clipboard(excel=False, sep='\t')
124+
125+
# Tests that the default behavior of to_clipboard is tab
126+
# delimited and excel="True"
127+
@pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
128+
"#21104, Fixed in #21111")
129+
@pytest.mark.parametrize('sep', ['\t', None, 'default'])
130+
@pytest.mark.parametrize('excel', [True, None, 'default'])
131+
def test_clipboard_copy_tabs_default(self, sep, excel, df):
132+
kwargs = build_kwargs(sep, excel)
133+
df.to_clipboard(**kwargs)
134+
if PY2:
135+
# to_clipboard copies unicode, to_csv produces bytes. This is
136+
# expected behavior
137+
assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
138+
else:
139+
assert clipboard_get() == df.to_csv(sep='\t')
140+
141+
# Tests reading of white space separated tables
142+
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
143+
"aren't handled correctly. in default c engine. Fixed "
144+
"in #21111 by defaulting to python engine for "
145+
"whitespace separator")
146+
@pytest.mark.parametrize('sep', [None, 'default'])
147+
@pytest.mark.parametrize('excel', [False])
148+
def test_clipboard_copy_strings(self, sep, excel, df):
149+
kwargs = build_kwargs(sep, excel)
150+
df.to_clipboard(**kwargs)
151+
result = read_clipboard(sep=r'\s+')
152+
assert result.to_string() == df.to_string()
153+
assert df.shape == result.shape
92154

93155
def test_read_clipboard_infer_excel(self):
94156
# gh-19010: avoid warnings
@@ -124,15 +186,15 @@ def test_read_clipboard_infer_excel(self):
124186

125187
tm.assert_frame_equal(res, exp)
126188

127-
def test_invalid_encoding(self):
189+
def test_invalid_encoding(self, df):
128190
# test case for testing invalid encoding
129-
data = self.data['string']
130191
with pytest.raises(ValueError):
131-
data.to_clipboard(encoding='ascii')
192+
df.to_clipboard(encoding='ascii')
132193
with pytest.raises(NotImplementedError):
133194
pd.read_clipboard(encoding='ascii')
134195

135-
def test_round_trip_valid_encodings(self):
136-
for enc in ['UTF-8', 'utf-8', 'utf8']:
137-
for dt in self.data_types:
138-
self.check_round_trip_frame(dt, encoding=enc)
196+
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
197+
'Issue in #21104, Fixed in #21111')
198+
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
199+
def test_round_trip_valid_encodings(self, enc, df):
200+
self.check_round_trip_frame(df, encoding=enc)

0 commit comments

Comments
 (0)