Skip to content

Cleanup clipboard tests #21163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 26, 2018
Merged
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 130 additions & 65 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from pandas import DataFrame
from pandas import read_clipboard
from pandas import get_option
from pandas.compat import PY2
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf
from pandas.io.clipboard.exceptions import PyperclipException
from pandas.io.clipboard import clipboard_set
from pandas.io.clipboard import clipboard_set, clipboard_get


try:
Expand All @@ -22,73 +23,136 @@
_DEPS_INSTALLED = 0


def build_kwargs(sep, excel):
kwargs = {}
if excel != 'default':
kwargs['excel'] = excel
if sep != 'default':
kwargs['sep'] = sep
return kwargs


def gen_df(data_type):
if data_type == 'delims':
return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
'b': ['hi\'j', 'k\'\'lm']})
elif data_type == 'utf8':
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
elif data_type == 'string':
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'long':
max_rows = get_option('display.max_rows')
return mkdf(max_rows + 1, 3,
data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'nonascii':
return pd.DataFrame({'en': 'in English'.split(),
'es': 'en español'.split()})
elif data_type == 'colwidth':
_cw = get_option('display.max_colwidth') + 1
return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'mixed':
return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
'b': np.arange(1, 6),
'c': list('abcde')})
elif data_type == 'float':
return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
elif data_type == 'int':
return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])


@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
'colwidth', 'mixed', 'float', 'int'])
def df(request):
return gen_df(request.param)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would just move gen_df inside the fixture itself, I think makes it more clear.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gen_df is called directly from some of the tests. The fixture is called using a a pytest object and gen_df needs to be called with a string argument. Is there a better workaround than separating the two functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can change it to something like this, but I think separating the two out is still preferable.

@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
                        'colwidth', 'mixed', 'float', 'int'])
def df(request):
    if type(request) is str:
        data_type = request
    else:
        data_type = request.param

    if data_type == 'delims':
        return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
                             'b': ['hi\'j', 'k\'\'lm']})
    elif data_type == 'utf8':
        return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
                             'b': ['øπ∆˚¬', 'œ∑´®']})

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh didn't see that, though then this begs the question of why you are not simply passing the constructed df in?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point, I'll update



@pytest.mark.single
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
class TestClipboard(object):

@classmethod
def setup_class(cls):
cls.data = {}
cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
cls.data['float'] = mkdf(5, 3,
data_gen_f=lambda r, c: float(r) + 0.01,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
'b': np.arange(1, 6),
'c': list('abcde')})

# Test columns exceeding "max_colwidth" (GH8305)
_cw = get_option('display.max_colwidth') + 1
cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
# Test GH-5346
max_rows = get_option('display.max_rows')
cls.data['longdf'] = mkdf(max_rows + 1, 3,
data_gen_f=lambda *args: randint(2),
c_idx_type='s', r_idx_type='i',
c_idx_names=[None], r_idx_names=[None])
# Test for non-ascii text: GH9263
cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
'es': 'en español'.split()})
# unicode round trip test for GH 13747, GH 12529
cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
'b': ['øπ∆˚¬', 'œ∑´®']})
cls.data_types = list(cls.data.keys())

@classmethod
def teardown_class(cls):
del cls.data_types, cls.data

def check_round_trip_frame(self, data_type, excel=None, sep=None,
def check_round_trip_frame(self, data, excel=None, sep=None,
encoding=None):
data = self.data[data_type]
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
if sep is not None:
result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
else:
result = read_clipboard(encoding=encoding)
result = read_clipboard(sep=sep or '\t', index_col=0,
encoding=encoding)
tm.assert_frame_equal(data, result, check_dtype=False)

def test_round_trip_frame_sep(self):
for dt in self.data_types:
self.check_round_trip_frame(dt, sep=',')
self.check_round_trip_frame(dt, sep=r'\s+')
self.check_round_trip_frame(dt, sep='|')

def test_round_trip_frame_string(self):
for dt in self.data_types:
self.check_round_trip_frame(dt, excel=False)

def test_round_trip_frame(self):
for dt in self.data_types:
self.check_round_trip_frame(dt)
# Test that default arguments copy as tab delimited
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
'Issue in #21104, Fixed in #21111')
def test_round_trip_frame(self, df):
self.check_round_trip_frame(df)

# Test that explicit delimiters are respected
@pytest.mark.parametrize('sep', ['\t', ',', '|'])
def test_round_trip_frame_sep(self, df, sep):
self.check_round_trip_frame(df, sep=sep)

# Test white space separator
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
"aren't handled correctly in default c engine. Fixed "
"in #21111 by defaulting to python engine for "
"whitespace separator")
def test_round_trip_frame_string(self, df):
df.to_clipboard(excel=False, sep=None)
result = read_clipboard()
assert df.to_string() == result.to_string()
assert df.shape == result.shape

# Two character separator is not supported in to_clipboard
# Test that multi-character separators are not silently passed
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
def test_excel_sep_warning(self):
with tm.assert_produces_warning():
gen_df('string').to_clipboard(excel=True, sep=r'\t')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so instead of calling gen_df inside a test, just create a nother fixture, simplier that way I think
e.g.

@pytest.fixture
def df_string():
    return gen_df('string')

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, that makes a lot of sense and I'll keep that in mind for future cases. I went a different way and just ran that test function on the entire df fixture, which is a little cleaner.


# Separator is ignored when excel=False and should produce a warning
# Fails, Fixed in #21111
@pytest.mark.xfail
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for tests that xfail, pls provide a reason

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I add comments in the code with the reason?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes if they are xfailing, reference the issue number and a brief description. (when the bug is fixed will remove the xfails, but that's in a subsequent PR).

def test_copy_delim_warning(self):
with tm.assert_produces_warning():
gen_df('string').to_clipboard(excel=False, sep='\t')

# Tests that the default behavior of to_clipboard is tab
# delimited and excel="True"
@pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
"#21104, Fixed in #21111")
@pytest.mark.parametrize('sep', ['\t', None, 'default'])
@pytest.mark.parametrize('excel', [True, None, 'default'])
def test_clipboard_copy_tabs_default(self, sep, excel, df):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
if PY2:
# to_clipboard copies unicode, to_csv produces bytes. This is
# expected behavior
assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
else:
assert clipboard_get() == df.to_csv(sep='\t')

# Tests reading of white space separated tables
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
"aren't handled correctly. in default c engine. Fixed "
"in #21111 by defaulting to python engine for "
"whitespace separator")
@pytest.mark.parametrize('sep', [None, 'default'])
@pytest.mark.parametrize('excel', [False])
def test_clipboard_copy_strings(self, sep, excel, df):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
result = read_clipboard(sep=r'\s+')
assert result.to_string() == df.to_string()
assert df.shape == result.shape

def test_read_clipboard_infer_excel(self):
# gh-19010: avoid warnings
Expand Down Expand Up @@ -126,13 +190,14 @@ def test_read_clipboard_infer_excel(self):

def test_invalid_encoding(self):
# test case for testing invalid encoding
data = self.data['string']
df = gen_df('string')
with pytest.raises(ValueError):
data.to_clipboard(encoding='ascii')
df.to_clipboard(encoding='ascii')
with pytest.raises(NotImplementedError):
pd.read_clipboard(encoding='ascii')

def test_round_trip_valid_encodings(self):
for enc in ['UTF-8', 'utf-8', 'utf8']:
for dt in self.data_types:
self.check_round_trip_frame(dt, encoding=enc)
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
'Issue in #21104, Fixed in #21111')
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)