Skip to content

Cleanup clipboard tests #21163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 26, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 129 additions & 67 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from pandas import DataFrame
from pandas import read_clipboard
from pandas import get_option
from pandas.compat import PY2
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf
from pandas.io.clipboard.exceptions import PyperclipException
from pandas.io.clipboard import clipboard_set
from pandas.io.clipboard import clipboard_set, clipboard_get


try:
Expand All @@ -22,73 +23,134 @@
_DEPS_INSTALLED = 0


def build_kwargs(sep, excel):
kwargs = {}
if excel != 'default':
kwargs['excel'] = excel
if sep != 'default':
kwargs['sep'] = sep
return kwargs


@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
'colwidth', 'mixed', 'float', 'int'])
def df(request):
data_type = request.param

if data_type == 'delims':
return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
'b': ['hi\'j', 'k\'\'lm']})
elif data_type == 'utf8':
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
elif data_type == 'string':
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'long':
max_rows = get_option('display.max_rows')
return mkdf(max_rows + 1, 3,
data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'nonascii':
return pd.DataFrame({'en': 'in English'.split(),
'es': 'en español'.split()})
elif data_type == 'colwidth':
_cw = get_option('display.max_colwidth') + 1
return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'mixed':
return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
'b': np.arange(1, 6),
'c': list('abcde')})
elif data_type == 'float':
return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'int':
return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
else:
raise ValueError


@pytest.mark.single
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
class TestClipboard(object):

@classmethod
def setup_class(cls):
cls.data = {}
cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
cls.data['float'] = mkdf(5, 3,
data_gen_f=lambda r, c: float(r) + 0.01,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
'b': np.arange(1, 6),
'c': list('abcde')})

# Test columns exceeding "max_colwidth" (GH8305)
_cw = get_option('display.max_colwidth') + 1
cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
# Test GH-5346
max_rows = get_option('display.max_rows')
cls.data['longdf'] = mkdf(max_rows + 1, 3,
data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
# Test for non-ascii text: GH9263
cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
'es': 'en español'.split()})
# unicode round trip test for GH 13747, GH 12529
cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
cls.data_types = list(cls.data.keys())

@classmethod
def teardown_class(cls):
del cls.data_types, cls.data

def check_round_trip_frame(self, data_type, excel=None, sep=None,
def check_round_trip_frame(self, data, excel=None, sep=None,
encoding=None):
data = self.data[data_type]
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
if sep is not None:
result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
else:
result = read_clipboard(encoding=encoding)
result = read_clipboard(sep=sep or '\t', index_col=0,
encoding=encoding)
tm.assert_frame_equal(data, result, check_dtype=False)

def test_round_trip_frame_sep(self):
for dt in self.data_types:
self.check_round_trip_frame(dt, sep=',')
self.check_round_trip_frame(dt, sep=r'\s+')
self.check_round_trip_frame(dt, sep='|')

def test_round_trip_frame_string(self):
for dt in self.data_types:
self.check_round_trip_frame(dt, excel=False)

def test_round_trip_frame(self):
for dt in self.data_types:
self.check_round_trip_frame(dt)
# Test that default arguments copy as tab delimited
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
'Issue in #21104, Fixed in #21111')
def test_round_trip_frame(self, df):
self.check_round_trip_frame(df)

# Test that explicit delimiters are respected
@pytest.mark.parametrize('sep', ['\t', ',', '|'])
def test_round_trip_frame_sep(self, df, sep):
self.check_round_trip_frame(df, sep=sep)

# Test white space separator
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
"aren't handled correctly in default c engine. Fixed "
"in #21111 by defaulting to python engine for "
"whitespace separator")
def test_round_trip_frame_string(self, df):
df.to_clipboard(excel=False, sep=None)
result = read_clipboard()
assert df.to_string() == result.to_string()
assert df.shape == result.shape

# Two character separator is not supported in to_clipboard
# Test that multi-character separators are not silently passed
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
def test_excel_sep_warning(self, df):
with tm.assert_produces_warning():
df.to_clipboard(excel=True, sep=r'\t')

# Separator is ignored when excel=False and should produce a warning
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
def test_copy_delim_warning(self, df):
with tm.assert_produces_warning():
df.to_clipboard(excel=False, sep='\t')

# Tests that the default behavior of to_clipboard is tab
# delimited and excel="True"
@pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
"#21104, Fixed in #21111")
@pytest.mark.parametrize('sep', ['\t', None, 'default'])
@pytest.mark.parametrize('excel', [True, None, 'default'])
def test_clipboard_copy_tabs_default(self, sep, excel, df):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
if PY2:
# to_clipboard copies unicode, to_csv produces bytes. This is
# expected behavior
assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
else:
assert clipboard_get() == df.to_csv(sep='\t')

# Tests reading of white space separated tables
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
"aren't handled correctly. in default c engine. Fixed "
"in #21111 by defaulting to python engine for "
"whitespace separator")
@pytest.mark.parametrize('sep', [None, 'default'])
@pytest.mark.parametrize('excel', [False])
def test_clipboard_copy_strings(self, sep, excel, df):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
result = read_clipboard(sep=r'\s+')
assert result.to_string() == df.to_string()
assert df.shape == result.shape

def test_read_clipboard_infer_excel(self):
# gh-19010: avoid warnings
Expand Down Expand Up @@ -124,15 +186,15 @@ def test_read_clipboard_infer_excel(self):

tm.assert_frame_equal(res, exp)

def test_invalid_encoding(self):
def test_invalid_encoding(self, df):
# test case for testing invalid encoding
data = self.data['string']
with pytest.raises(ValueError):
data.to_clipboard(encoding='ascii')
df.to_clipboard(encoding='ascii')
with pytest.raises(NotImplementedError):
pd.read_clipboard(encoding='ascii')

def test_round_trip_valid_encodings(self):
for enc in ['UTF-8', 'utf-8', 'utf8']:
for dt in self.data_types:
self.check_round_trip_frame(dt, encoding=enc)
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
'Issue in #21104, Fixed in #21111')
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)