diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 987f171878d0b..1ca693755b3c6 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -57,6 +57,7 @@ Fixed Regressions - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`) - Fixed regression in unary negative operations with object dtype (:issue:`21380`) - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`) +- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) .. _whatsnew_0232.performance: diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index dcc221ce978b3..b3f40b3a2429c 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -1,6 +1,7 @@ """ io on the clipboard """ from pandas import compat, get_option, option_context, DataFrame -from pandas.compat import StringIO, PY2 +from pandas.compat import StringIO, PY2, PY3 +import warnings def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover @@ -32,7 +33,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover # try to decode (if needed on PY3) # Strange. linux py33 doesn't complain, win py33 does - if compat.PY3: + if PY3: try: text = compat.bytes_to_str( text, encoding=(kwargs.get('encoding') or @@ -55,11 +56,27 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover counts = {x.lstrip().count('\t') for x in lines} if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: - sep = r'\t' + sep = '\t' + # Edge case where sep is specified to be None, return to default if sep is None and kwargs.get('delim_whitespace') is None: sep = r'\s+' + # Regex separator currently only works with python engine. + # Default to python if separator is multi-character (regex) + if len(sep) > 1 and kwargs.get('engine') is None: + kwargs['engine'] = 'python' + elif len(sep) > 1 and kwargs.get('engine') == 'c': + warnings.warn('read_clipboard with regex separator does not work' + ' properly with c engine') + + # In PY2, the c table reader first encodes text with UTF-8 but Python + # table reader uses the format of the passed string. For consistency, + # encode strings for python engine so that output from python and c + # engines produce consistent results + if kwargs.get('engine') == 'python' and PY2: + text = text.encode('utf-8') + return read_table(StringIO(text), sep=sep, **kwargs) @@ -99,7 +116,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover if excel: try: if sep is None: - sep = r'\t' + sep = '\t' buf = StringIO() # clipboard_set (pyperclip) expects unicode obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs) @@ -108,8 +125,11 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover text = text.decode('utf-8') clipboard_set(text) return - except: - pass + except TypeError: + warnings.warn('to_clipboard in excel mode requires a single ' + 'character separator.') + elif sep is not None: + warnings.warn('to_clipboard with excel=False ignores the sep argument') if isinstance(obj, DataFrame): # str(df) has various unhelpful defaults, like truncation diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 80fddd50fc9a8..a6b331685e72a 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -88,8 +88,6 @@ def check_round_trip_frame(self, data, excel=None, sep=None, tm.assert_frame_equal(data, result, check_dtype=False) # Test that default arguments copy as tab delimited - @pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' - 'Issue in #21104, Fixed in #21111') def test_round_trip_frame(self, df): self.check_round_trip_frame(df) @@ -99,10 +97,6 @@ def test_round_trip_frame_sep(self, df, sep): self.check_round_trip_frame(df, sep=sep) # Test white space separator - @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " - "aren't handled correctly in default c engine. Fixed " - "in #21111 by defaulting to python engine for " - "whitespace separator") def test_round_trip_frame_string(self, df): df.to_clipboard(excel=False, sep=None) result = read_clipboard() @@ -111,21 +105,17 @@ def test_round_trip_frame_string(self, df): # Two character separator is not supported in to_clipboard # Test that multi-character separators are not silently passed - @pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") def test_excel_sep_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=True, sep=r'\t') # Separator is ignored when excel=False and should produce a warning - @pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") def test_copy_delim_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=False, sep='\t') # Tests that the default behavior of to_clipboard is tab # delimited and excel="True" - @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in " - "#21104, Fixed in #21111") @pytest.mark.parametrize('sep', ['\t', None, 'default']) @pytest.mark.parametrize('excel', [True, None, 'default']) def test_clipboard_copy_tabs_default(self, sep, excel, df): @@ -139,10 +129,6 @@ def test_clipboard_copy_tabs_default(self, sep, excel, df): assert clipboard_get() == df.to_csv(sep='\t') # Tests reading of white space separated tables - @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " - "aren't handled correctly. in default c engine. Fixed " - "in #21111 by defaulting to python engine for " - "whitespace separator") @pytest.mark.parametrize('sep', [None, 'default']) @pytest.mark.parametrize('excel', [False]) def test_clipboard_copy_strings(self, sep, excel, df): @@ -193,8 +179,6 @@ def test_invalid_encoding(self, df): with pytest.raises(NotImplementedError): pd.read_clipboard(encoding='ascii') - @pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' - 'Issue in #21104, Fixed in #21111') @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8']) def test_round_trip_valid_encodings(self, enc, df): self.check_round_trip_frame(df, encoding=enc)