-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Cleanup clipboard tests #21163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cleanup clipboard tests #21163
Changes from 6 commits
2c670bd
2470638
ad3c198
bfba15c
7888f83
5f4baa3
2613a06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,10 +9,11 @@ | |
from pandas import DataFrame | ||
from pandas import read_clipboard | ||
from pandas import get_option | ||
from pandas.compat import PY2 | ||
from pandas.util import testing as tm | ||
from pandas.util.testing import makeCustomDataframe as mkdf | ||
from pandas.io.clipboard.exceptions import PyperclipException | ||
from pandas.io.clipboard import clipboard_set | ||
from pandas.io.clipboard import clipboard_set, clipboard_get | ||
|
||
|
||
try: | ||
|
@@ -22,73 +23,136 @@ | |
_DEPS_INSTALLED = 0 | ||
|
||
|
||
def build_kwargs(sep, excel): | ||
kwargs = {} | ||
if excel != 'default': | ||
kwargs['excel'] = excel | ||
if sep != 'default': | ||
kwargs['sep'] = sep | ||
return kwargs | ||
|
||
|
||
def gen_df(data_type): | ||
if data_type == 'delims': | ||
return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'], | ||
'b': ['hi\'j', 'k\'\'lm']}) | ||
elif data_type == 'utf8': | ||
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], | ||
'b': ['øπ∆˚¬', 'œ∑´®']}) | ||
elif data_type == 'string': | ||
return mkdf(5, 3, c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
elif data_type == 'long': | ||
max_rows = get_option('display.max_rows') | ||
return mkdf(max_rows + 1, 3, | ||
data_gen_f=lambda *args: randint(2), | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
elif data_type == 'nonascii': | ||
return pd.DataFrame({'en': 'in English'.split(), | ||
'es': 'en español'.split()}) | ||
elif data_type == 'colwidth': | ||
_cw = get_option('display.max_colwidth') + 1 | ||
return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
elif data_type == 'mixed': | ||
return DataFrame({'a': np.arange(1.0, 6.0) + 0.01, | ||
'b': np.arange(1, 6), | ||
'c': list('abcde')}) | ||
elif data_type == 'float': | ||
return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01, | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
elif data_type == 'int': | ||
return mkdf(5, 3, data_gen_f=lambda *args: randint(2), | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
|
||
|
||
@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii', | ||
'colwidth', 'mixed', 'float', 'int']) | ||
def df(request): | ||
return gen_df(request.param) | ||
|
||
|
||
@pytest.mark.single | ||
@pytest.mark.skipif(not _DEPS_INSTALLED, | ||
reason="clipboard primitives not installed") | ||
class TestClipboard(object): | ||
|
||
@classmethod | ||
def setup_class(cls): | ||
cls.data = {} | ||
cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2), | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
cls.data['float'] = mkdf(5, 3, | ||
data_gen_f=lambda r, c: float(r) + 0.01, | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, | ||
'b': np.arange(1, 6), | ||
'c': list('abcde')}) | ||
|
||
# Test columns exceeding "max_colwidth" (GH8305) | ||
_cw = get_option('display.max_colwidth') + 1 | ||
cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
# Test GH-5346 | ||
max_rows = get_option('display.max_rows') | ||
cls.data['longdf'] = mkdf(max_rows + 1, 3, | ||
data_gen_f=lambda *args: randint(2), | ||
c_idx_type='s', r_idx_type='i', | ||
c_idx_names=[None], r_idx_names=[None]) | ||
# Test for non-ascii text: GH9263 | ||
cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(), | ||
'es': 'en español'.split()}) | ||
# unicode round trip test for GH 13747, GH 12529 | ||
cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], | ||
'b': ['øπ∆˚¬', 'œ∑´®']}) | ||
cls.data_types = list(cls.data.keys()) | ||
|
||
@classmethod | ||
def teardown_class(cls): | ||
del cls.data_types, cls.data | ||
|
||
def check_round_trip_frame(self, data_type, excel=None, sep=None, | ||
def check_round_trip_frame(self, data, excel=None, sep=None, | ||
encoding=None): | ||
data = self.data[data_type] | ||
data.to_clipboard(excel=excel, sep=sep, encoding=encoding) | ||
if sep is not None: | ||
result = read_clipboard(sep=sep, index_col=0, encoding=encoding) | ||
else: | ||
result = read_clipboard(encoding=encoding) | ||
result = read_clipboard(sep=sep or '\t', index_col=0, | ||
encoding=encoding) | ||
tm.assert_frame_equal(data, result, check_dtype=False) | ||
|
||
def test_round_trip_frame_sep(self): | ||
for dt in self.data_types: | ||
self.check_round_trip_frame(dt, sep=',') | ||
self.check_round_trip_frame(dt, sep=r'\s+') | ||
self.check_round_trip_frame(dt, sep='|') | ||
|
||
def test_round_trip_frame_string(self): | ||
for dt in self.data_types: | ||
self.check_round_trip_frame(dt, excel=False) | ||
|
||
def test_round_trip_frame(self): | ||
for dt in self.data_types: | ||
self.check_round_trip_frame(dt) | ||
# Test that default arguments copy as tab delimited | ||
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' | ||
'Issue in #21104, Fixed in #21111') | ||
def test_round_trip_frame(self, df): | ||
self.check_round_trip_frame(df) | ||
|
||
# Test that explicit delimiters are respected | ||
@pytest.mark.parametrize('sep', ['\t', ',', '|']) | ||
def test_round_trip_frame_sep(self, df, sep): | ||
self.check_round_trip_frame(df, sep=sep) | ||
|
||
# Test white space separator | ||
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " | ||
"aren't handled correctly in default c engine. Fixed " | ||
"in #21111 by defaulting to python engine for " | ||
"whitespace separator") | ||
def test_round_trip_frame_string(self, df): | ||
df.to_clipboard(excel=False, sep=None) | ||
result = read_clipboard() | ||
assert df.to_string() == result.to_string() | ||
assert df.shape == result.shape | ||
|
||
# Two character separator is not supported in to_clipboard | ||
# Test that multi-character separators are not silently passed | ||
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") | ||
def test_excel_sep_warning(self): | ||
with tm.assert_produces_warning(): | ||
gen_df('string').to_clipboard(excel=True, sep=r'\t') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so instead of calling gen_df inside a test, just create a nother fixture, simplier that way I think
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, that makes a lot of sense and I'll keep that in mind for future cases. I went a different way and just ran that test function on the entire |
||
|
||
# Separator is ignored when excel=False and should produce a warning | ||
# Fails, Fixed in #21111 | ||
@pytest.mark.xfail | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for tests that xfail, pls provide a reason There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should I add comments in the code with the reason? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes if they are xfailing, reference the issue number and a brief description. (when the bug is fixed will remove the xfails, but that's in a subsequent PR). |
||
def test_copy_delim_warning(self): | ||
with tm.assert_produces_warning(): | ||
gen_df('string').to_clipboard(excel=False, sep='\t') | ||
|
||
# Tests that the default behavior of to_clipboard is tab | ||
# delimited and excel="True" | ||
@pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in " | ||
"#21104, Fixed in #21111") | ||
@pytest.mark.parametrize('sep', ['\t', None, 'default']) | ||
@pytest.mark.parametrize('excel', [True, None, 'default']) | ||
def test_clipboard_copy_tabs_default(self, sep, excel, df): | ||
kwargs = build_kwargs(sep, excel) | ||
df.to_clipboard(**kwargs) | ||
if PY2: | ||
# to_clipboard copies unicode, to_csv produces bytes. This is | ||
# expected behavior | ||
assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t') | ||
else: | ||
assert clipboard_get() == df.to_csv(sep='\t') | ||
|
||
# Tests reading of white space separated tables | ||
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " | ||
"aren't handled correctly. in default c engine. Fixed " | ||
"in #21111 by defaulting to python engine for " | ||
"whitespace separator") | ||
@pytest.mark.parametrize('sep', [None, 'default']) | ||
@pytest.mark.parametrize('excel', [False]) | ||
def test_clipboard_copy_strings(self, sep, excel, df): | ||
kwargs = build_kwargs(sep, excel) | ||
df.to_clipboard(**kwargs) | ||
result = read_clipboard(sep=r'\s+') | ||
assert result.to_string() == df.to_string() | ||
assert df.shape == result.shape | ||
|
||
def test_read_clipboard_infer_excel(self): | ||
# gh-19010: avoid warnings | ||
|
@@ -126,13 +190,14 @@ def test_read_clipboard_infer_excel(self): | |
|
||
def test_invalid_encoding(self): | ||
# test case for testing invalid encoding | ||
data = self.data['string'] | ||
df = gen_df('string') | ||
with pytest.raises(ValueError): | ||
data.to_clipboard(encoding='ascii') | ||
df.to_clipboard(encoding='ascii') | ||
with pytest.raises(NotImplementedError): | ||
pd.read_clipboard(encoding='ascii') | ||
|
||
def test_round_trip_valid_encodings(self): | ||
for enc in ['UTF-8', 'utf-8', 'utf8']: | ||
for dt in self.data_types: | ||
self.check_round_trip_frame(dt, encoding=enc) | ||
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' | ||
'Issue in #21104, Fixed in #21111') | ||
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8']) | ||
def test_round_trip_valid_encodings(self, enc, df): | ||
self.check_round_trip_frame(df, encoding=enc) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would just move gen_df inside the fixture itself, I think makes it more clear.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
gen_df is called directly from some of the tests. The fixture is called using a a pytest object and gen_df needs to be called with a string argument. Is there a better workaround than separating the two functions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can change it to something like this, but I think separating the two out is still preferable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh didn't see that, though then this begs the question of why you are not simply passing the constructed df in?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point, I'll update