From 742aa3b3ed73f39aec530d5e1e3db82155c2bbc2 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 <36486871+david-liu-brattle-1@users.noreply.github.com> Date: Thu, 17 May 2018 19:06:25 -0400 Subject: [PATCH 01/21] Fixed copy table to excel Reverted a change in e1d5a2738235fec22f3cfad4814e09e3e3786f8c --- pandas/io/clipboards.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index dcc221ce978b3..c7485fccbfbaf 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -55,7 +55,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover counts = {x.lstrip().count('\t') for x in lines} if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: - sep = r'\t' + sep = '\t' if sep is None and kwargs.get('delim_whitespace') is None: sep = r'\s+' @@ -99,7 +99,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover if excel: try: if sep is None: - sep = r'\t' + sep = '\t' buf = StringIO() # clipboard_set (pyperclip) expects unicode obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs) From a8c098d596c39df7f4599bf1da516d00be7de226 Mon Sep 17 00:00:00 2001 From: david liu Date: Thu, 17 May 2018 23:51:02 -0400 Subject: [PATCH 02/21] Unit Test and whatsnew --- doc/source/whatsnew/v0.23.1.txt | 91 +++++++++++++++++++++++++++++++ pandas/tests/io/test_clipboard.py | 11 +++- 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 doc/source/whatsnew/v0.23.1.txt diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt new file mode 100644 index 0000000000000..5884902c55924 --- /dev/null +++ b/doc/source/whatsnew/v0.23.1.txt @@ -0,0 +1,91 @@ +.. _whatsnew_0231: + +v0.23.1 +------- + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.23.1 + :local: + :backlinks: none + +.. _whatsnew_0231.enhancements: + +New features +~~~~~~~~~~~~ + + +.. _whatsnew_0231.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- +- + +.. _whatsnew_0231.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`) +- +- + +Documentation Changes +~~~~~~~~~~~~~~~~~~~~~ + +- +- + +.. _whatsnew_0231.bug_fixes: + +Bug Fixes +~~~~~~~~~ + +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) + +Strings +^^^^^^^ + +- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`) +- + +Conversion +^^^^^^^^^^ + +- +- + +Indexing +^^^^^^^^ + +- +- + +I/O +^^^ + +-Bug in :func:`DataFrame.to_clipboard` where data sent to clipboard was not properly tab-delimited even when ``excel=True`` (:issue:`21104`) +- + +Plotting +^^^^^^^^ + +- +- + +Reshaping +^^^^^^^^^ + +- +- + +Categorical +^^^^^^^^^^^ + +- diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 98c0effabec84..3b4a1fb9d4e3b 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -12,7 +12,7 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf from pandas.io.clipboard.exceptions import PyperclipException -from pandas.io.clipboard import clipboard_set +from pandas.io.clipboard import clipboard_set, clipboard_get try: @@ -81,6 +81,7 @@ def test_round_trip_frame_sep(self): self.check_round_trip_frame(dt, sep=',') self.check_round_trip_frame(dt, sep=r'\s+') self.check_round_trip_frame(dt, sep='|') + self.check_round_trip_frame(dt, sep='\t') def test_round_trip_frame_string(self): for dt in self.data_types: @@ -123,6 +124,14 @@ def test_read_clipboard_infer_excel(self): exp = pd.read_clipboard(**clip_kwargs) tm.assert_frame_equal(res, exp) + def test_excel_clipboard_format(self): + for dt in self.data_types: + for sep in ['\t',None]: + data = self.data[dt] + data.to_clipboard(excel=True, sep=sep) + result = read_clipboard(sep='\t', index_col=0) + tm.assert_frame_equal(data, result, check_dtype=False) + assert clipboard_get().count('\t') > 0 def test_invalid_encoding(self): # test case for testing invalid encoding From 1fee38f5b6fa81a4030027c5ed4013462995d2a4 Mon Sep 17 00:00:00 2001 From: david liu Date: Thu, 17 May 2018 23:52:37 -0400 Subject: [PATCH 03/21] Revert "Unit Test and whatsnew" This reverts commit a8c098d596c39df7f4599bf1da516d00be7de226. --- doc/source/whatsnew/v0.23.1.txt | 91 ------------------------------- pandas/tests/io/test_clipboard.py | 11 +--- 2 files changed, 1 insertion(+), 101 deletions(-) delete mode 100644 doc/source/whatsnew/v0.23.1.txt diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt deleted file mode 100644 index 5884902c55924..0000000000000 --- a/doc/source/whatsnew/v0.23.1.txt +++ /dev/null @@ -1,91 +0,0 @@ -.. _whatsnew_0231: - -v0.23.1 -------- - -This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes -and bug fixes. We recommend that all users upgrade to this version. - -.. contents:: What's new in v0.23.1 - :local: - :backlinks: none - -.. _whatsnew_0231.enhancements: - -New features -~~~~~~~~~~~~ - - -.. _whatsnew_0231.deprecations: - -Deprecations -~~~~~~~~~~~~ - -- -- - -.. _whatsnew_0231.performance: - -Performance Improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - -- Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`) -- -- - -Documentation Changes -~~~~~~~~~~~~~~~~~~~~~ - -- -- - -.. _whatsnew_0231.bug_fixes: - -Bug Fixes -~~~~~~~~~ - -Groupby/Resample/Rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - -- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) - -Strings -^^^^^^^ - -- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`) -- - -Conversion -^^^^^^^^^^ - -- -- - -Indexing -^^^^^^^^ - -- -- - -I/O -^^^ - --Bug in :func:`DataFrame.to_clipboard` where data sent to clipboard was not properly tab-delimited even when ``excel=True`` (:issue:`21104`) -- - -Plotting -^^^^^^^^ - -- -- - -Reshaping -^^^^^^^^^ - -- -- - -Categorical -^^^^^^^^^^^ - -- diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3b4a1fb9d4e3b..98c0effabec84 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -12,7 +12,7 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf from pandas.io.clipboard.exceptions import PyperclipException -from pandas.io.clipboard import clipboard_set, clipboard_get +from pandas.io.clipboard import clipboard_set try: @@ -81,7 +81,6 @@ def test_round_trip_frame_sep(self): self.check_round_trip_frame(dt, sep=',') self.check_round_trip_frame(dt, sep=r'\s+') self.check_round_trip_frame(dt, sep='|') - self.check_round_trip_frame(dt, sep='\t') def test_round_trip_frame_string(self): for dt in self.data_types: @@ -124,14 +123,6 @@ def test_read_clipboard_infer_excel(self): exp = pd.read_clipboard(**clip_kwargs) tm.assert_frame_equal(res, exp) - def test_excel_clipboard_format(self): - for dt in self.data_types: - for sep in ['\t',None]: - data = self.data[dt] - data.to_clipboard(excel=True, sep=sep) - result = read_clipboard(sep='\t', index_col=0) - tm.assert_frame_equal(data, result, check_dtype=False) - assert clipboard_get().count('\t') > 0 def test_invalid_encoding(self): # test case for testing invalid encoding From fd1d3dd22451d792647d4f68a36dab0692c8c47a Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 18 May 2018 00:03:15 -0400 Subject: [PATCH 04/21] Unit test for excel clipboard IO and updated whatsnew --- doc/source/whatsnew/v0.23.1.txt | 2 +- pandas/tests/io/test_clipboard.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 9c19d4d6bbaad..5884902c55924 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -70,7 +70,7 @@ Indexing I/O ^^^ -- +-Bug in :func:`DataFrame.to_clipboard` where data sent to clipboard was not properly tab-delimited even when ``excel=True`` (:issue:`21104`) - Plotting diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 98c0effabec84..3b4a1fb9d4e3b 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -12,7 +12,7 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf from pandas.io.clipboard.exceptions import PyperclipException -from pandas.io.clipboard import clipboard_set +from pandas.io.clipboard import clipboard_set, clipboard_get try: @@ -81,6 +81,7 @@ def test_round_trip_frame_sep(self): self.check_round_trip_frame(dt, sep=',') self.check_round_trip_frame(dt, sep=r'\s+') self.check_round_trip_frame(dt, sep='|') + self.check_round_trip_frame(dt, sep='\t') def test_round_trip_frame_string(self): for dt in self.data_types: @@ -123,6 +124,14 @@ def test_read_clipboard_infer_excel(self): exp = pd.read_clipboard(**clip_kwargs) tm.assert_frame_equal(res, exp) + def test_excel_clipboard_format(self): + for dt in self.data_types: + for sep in ['\t',None]: + data = self.data[dt] + data.to_clipboard(excel=True, sep=sep) + result = read_clipboard(sep='\t', index_col=0) + tm.assert_frame_equal(data, result, check_dtype=False) + assert clipboard_get().count('\t') > 0 def test_invalid_encoding(self): # test case for testing invalid encoding From 8439dfe5b81487f316a977b3bc41be712e032198 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 <36486871+david-liu-brattle-1@users.noreply.github.com> Date: Fri, 18 May 2018 00:20:23 -0400 Subject: [PATCH 05/21] PEP8 --- pandas/tests/io/test_clipboard.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3b4a1fb9d4e3b..dcb72ecfacb52 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -124,9 +124,10 @@ def test_read_clipboard_infer_excel(self): exp = pd.read_clipboard(**clip_kwargs) tm.assert_frame_equal(res, exp) + def test_excel_clipboard_format(self): for dt in self.data_types: - for sep in ['\t',None]: + for sep in ['\t', None]: data = self.data[dt] data.to_clipboard(excel=True, sep=sep) result = read_clipboard(sep='\t', index_col=0) From 753e2396bfa59ac6f348085ea1676a45f5cdb6ea Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 18 May 2018 10:37:32 -0400 Subject: [PATCH 06/21] Test for function default values --- pandas/tests/io/test_clipboard.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index dcb72ecfacb52..f5c231af0c03d 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -124,15 +124,22 @@ def test_read_clipboard_infer_excel(self): exp = pd.read_clipboard(**clip_kwargs) tm.assert_frame_equal(res, exp) - + def test_excel_clipboard_format(self): for dt in self.data_types: - for sep in ['\t', None]: - data = self.data[dt] - data.to_clipboard(excel=True, sep=sep) - result = read_clipboard(sep='\t', index_col=0) - tm.assert_frame_equal(data, result, check_dtype=False) - assert clipboard_get().count('\t') > 0 + for sep in ['\t', None, 'default']: + for excel in [True, None, 'default']: + #Function default should be to to produce tab delimited + kwargs = {} + if excel != 'default': + kwargs['excel'] = excel + if sep != 'default': + kwargs['sep'] = sep + data = self.data[dt] + data.to_clipboard(**kwargs) + result = read_clipboard(sep='\t', index_col=0) + tm.assert_frame_equal(data, result, check_dtype=False) + assert clipboard_get().count('\t') > 0 def test_invalid_encoding(self): # test case for testing invalid encoding From ba4bc36f58844963f0ada735850106bd637a7789 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 18 May 2018 12:31:53 -0400 Subject: [PATCH 07/21] More robust clipboard tests Added additional test df containing common delimiter symbols and quotes. Added warning when attempting to copy excel format but an error is caught Default engine to "python" when reading clipboard with regex delimiter --- pandas/io/clipboards.py | 10 ++++-- pandas/tests/io/test_clipboard.py | 52 ++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index c7485fccbfbaf..08099c9765d96 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -1,7 +1,7 @@ """ io on the clipboard """ from pandas import compat, get_option, option_context, DataFrame from pandas.compat import StringIO, PY2 - +import warnings def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover r""" @@ -60,6 +60,9 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover if sep is None and kwargs.get('delim_whitespace') is None: sep = r'\s+' + if sep == r'\s+' and kwargs.get('engine') is None: + kwargs['engine'] = 'python' + return read_table(StringIO(text), sep=sep, **kwargs) @@ -108,8 +111,9 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover text = text.decode('utf-8') clipboard_set(text) return - except: - pass + except TypeError: + warnings.warn('to_clipboard in excel mode requires a single \ + character separator. Set "excel=false" or change the separator') if isinstance(obj, DataFrame): # str(df) has various unhelpful defaults, like truncation diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index f5c231af0c03d..0b5cf7d10f415 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -60,6 +60,9 @@ def setup_class(cls): # unicode round trip test for GH 13747, GH 12529 cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], 'b': ['øπ∆˚¬', 'œ∑´®']}) + # Test for quotes and common delimiters in text + cls.data['delim_symbols'] = pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'], + 'b': ['hi\'j', 'k\'\'lm']}) cls.data_types = list(cls.data.keys()) @classmethod @@ -69,12 +72,26 @@ def teardown_class(cls): def check_round_trip_frame(self, data_type, excel=None, sep=None, encoding=None): data = self.data[data_type] - data.to_clipboard(excel=excel, sep=sep, encoding=encoding) - if sep is not None: - result = read_clipboard(sep=sep, index_col=0, encoding=encoding) + if excel in [None, True] and sep is not None and len(sep) > 1: + with tm.assert_produces_warning(): + data.to_clipboard(excel=excel, sep=sep, encoding=encoding) else: - result = read_clipboard(encoding=encoding) - tm.assert_frame_equal(data, result, check_dtype=False) + data.to_clipboard(excel=excel, sep=sep, encoding=encoding) + + if excel in [None, True] and sep is not None: + # Expect Excel + result = read_clipboard(sep=sep, index_col=0, encoding=encoding) + elif excel in [None, True] and sep is None: + # Expect Excel with tabs + result = read_clipboard(sep='\t', index_col=0, encoding=encoding) + else: + # Expect df.__repr__ format + result = read_clipboard(encoding=encoding) + + if excel in [None, True]: + tm.assert_frame_equal(data, result, check_dtype=False) + else: + assert data.to_string() == result.to_string() def test_round_trip_frame_sep(self): for dt in self.data_types: @@ -125,11 +142,11 @@ def test_read_clipboard_infer_excel(self): tm.assert_frame_equal(res, exp) - def test_excel_clipboard_format(self): + def test_excel_clipboard_tabs(self): for dt in self.data_types: - for sep in ['\t', None, 'default']: - for excel in [True, None, 'default']: - #Function default should be to to produce tab delimited + for sep in ['\t', None, 'default', ',', '|']: + for excel in [True, None, 'default', False]: + # Function default should be to to produce tab delimited kwargs = {} if excel != 'default': kwargs['excel'] = excel @@ -137,9 +154,20 @@ def test_excel_clipboard_format(self): kwargs['sep'] = sep data = self.data[dt] data.to_clipboard(**kwargs) - result = read_clipboard(sep='\t', index_col=0) - tm.assert_frame_equal(data, result, check_dtype=False) - assert clipboard_get().count('\t') > 0 + if sep in ['\t', None, 'default'] and excel is not False: + # Expect tab delimited + result = read_clipboard(sep='\t', index_col=0) + tm.assert_frame_equal(data, result, check_dtype=False) + assert clipboard_get().count('\t') > 0 + elif excel is False: + # Expect spaces (ie. df.__repr__() default) + result = read_clipboard(sep=r'\s+') + assert result.to_string() == data.to_string() + else: + # Expect other delimited ',' and '|' + result = read_clipboard(sep=sep, index_col=0) + tm.assert_frame_equal(data, result, check_dtype=False) + assert clipboard_get().count(sep) > 0 def test_invalid_encoding(self): # test case for testing invalid encoding From ef8bf54aa83c0caa5005c09b76841e406bab5475 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 18 May 2018 12:36:53 -0400 Subject: [PATCH 08/21] Test for correct shape when results aren't expected to exactly match --- pandas/tests/io/test_clipboard.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 0b5cf7d10f415..7c7a34fd95288 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -92,6 +92,7 @@ def check_round_trip_frame(self, data_type, excel=None, sep=None, tm.assert_frame_equal(data, result, check_dtype=False) else: assert data.to_string() == result.to_string() + assert data.shape == result.shape def test_round_trip_frame_sep(self): for dt in self.data_types: @@ -163,6 +164,7 @@ def test_excel_clipboard_tabs(self): # Expect spaces (ie. df.__repr__() default) result = read_clipboard(sep=r'\s+') assert result.to_string() == data.to_string() + assert data.shape == result.shape else: # Expect other delimited ',' and '|' result = read_clipboard(sep=sep, index_col=0) From 4d8a1aa5878cf703863a2ff3e8ad34a1f4f74788 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 18 May 2018 14:51:31 -0400 Subject: [PATCH 09/21] PEP8 --- pandas/tests/io/test_clipboard.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 7c7a34fd95288..4beda695e8b1e 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -80,10 +80,12 @@ def check_round_trip_frame(self, data_type, excel=None, sep=None, if excel in [None, True] and sep is not None: # Expect Excel - result = read_clipboard(sep=sep, index_col=0, encoding=encoding) + result = read_clipboard(sep=sep, index_col=0, + encoding=encoding) elif excel in [None, True] and sep is None: # Expect Excel with tabs - result = read_clipboard(sep='\t', index_col=0, encoding=encoding) + result = read_clipboard(sep='\t', index_col=0, + encoding=encoding) else: # Expect df.__repr__ format result = read_clipboard(encoding=encoding) From ce02a40d887f3c5944c3c701be6d43454829d68a Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 18 May 2018 16:02:34 -0400 Subject: [PATCH 10/21] Formatting --- pandas/io/clipboards.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 08099c9765d96..6f21c3a8d3c8c 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -3,6 +3,7 @@ from pandas.compat import StringIO, PY2 import warnings + def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover r""" Read text from clipboard and pass to read_table. See read_table for the From f698ed6bfcf6518570b070bef17c66cfc77047fa Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Tue, 5 Jun 2018 22:26:22 -0400 Subject: [PATCH 11/21] Rebase --- doc/source/whatsnew/v0.23.1.txt | 1 - pandas/tests/io/test_clipboard.py | 61 +++---------------------------- 2 files changed, 6 insertions(+), 56 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 08d5079d203db..b1158eee16d96 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -93,7 +93,6 @@ Indexing I/O ^^^ -======= - Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`) - Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`) - Bug in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 4beda695e8b1e..98c0effabec84 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -12,7 +12,7 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf from pandas.io.clipboard.exceptions import PyperclipException -from pandas.io.clipboard import clipboard_set, clipboard_get +from pandas.io.clipboard import clipboard_set try: @@ -60,9 +60,6 @@ def setup_class(cls): # unicode round trip test for GH 13747, GH 12529 cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], 'b': ['øπ∆˚¬', 'œ∑´®']}) - # Test for quotes and common delimiters in text - cls.data['delim_symbols'] = pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'], - 'b': ['hi\'j', 'k\'\'lm']}) cls.data_types = list(cls.data.keys()) @classmethod @@ -72,36 +69,18 @@ def teardown_class(cls): def check_round_trip_frame(self, data_type, excel=None, sep=None, encoding=None): data = self.data[data_type] - if excel in [None, True] and sep is not None and len(sep) > 1: - with tm.assert_produces_warning(): - data.to_clipboard(excel=excel, sep=sep, encoding=encoding) + data.to_clipboard(excel=excel, sep=sep, encoding=encoding) + if sep is not None: + result = read_clipboard(sep=sep, index_col=0, encoding=encoding) else: - data.to_clipboard(excel=excel, sep=sep, encoding=encoding) - - if excel in [None, True] and sep is not None: - # Expect Excel - result = read_clipboard(sep=sep, index_col=0, - encoding=encoding) - elif excel in [None, True] and sep is None: - # Expect Excel with tabs - result = read_clipboard(sep='\t', index_col=0, - encoding=encoding) - else: - # Expect df.__repr__ format - result = read_clipboard(encoding=encoding) - - if excel in [None, True]: - tm.assert_frame_equal(data, result, check_dtype=False) - else: - assert data.to_string() == result.to_string() - assert data.shape == result.shape + result = read_clipboard(encoding=encoding) + tm.assert_frame_equal(data, result, check_dtype=False) def test_round_trip_frame_sep(self): for dt in self.data_types: self.check_round_trip_frame(dt, sep=',') self.check_round_trip_frame(dt, sep=r'\s+') self.check_round_trip_frame(dt, sep='|') - self.check_round_trip_frame(dt, sep='\t') def test_round_trip_frame_string(self): for dt in self.data_types: @@ -145,34 +124,6 @@ def test_read_clipboard_infer_excel(self): tm.assert_frame_equal(res, exp) - def test_excel_clipboard_tabs(self): - for dt in self.data_types: - for sep in ['\t', None, 'default', ',', '|']: - for excel in [True, None, 'default', False]: - # Function default should be to to produce tab delimited - kwargs = {} - if excel != 'default': - kwargs['excel'] = excel - if sep != 'default': - kwargs['sep'] = sep - data = self.data[dt] - data.to_clipboard(**kwargs) - if sep in ['\t', None, 'default'] and excel is not False: - # Expect tab delimited - result = read_clipboard(sep='\t', index_col=0) - tm.assert_frame_equal(data, result, check_dtype=False) - assert clipboard_get().count('\t') > 0 - elif excel is False: - # Expect spaces (ie. df.__repr__() default) - result = read_clipboard(sep=r'\s+') - assert result.to_string() == data.to_string() - assert data.shape == result.shape - else: - # Expect other delimited ',' and '|' - result = read_clipboard(sep=sep, index_col=0) - tm.assert_frame_equal(data, result, check_dtype=False) - assert clipboard_get().count(sep) > 0 - def test_invalid_encoding(self): # test case for testing invalid encoding data = self.data['string'] From 2b7b8917ee4784067e22754819f0261707916c38 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Tue, 5 Jun 2018 22:27:40 -0400 Subject: [PATCH 12/21] Typo --- pandas/io/clipboards.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 6f21c3a8d3c8c..d2b58b3ec313f 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -114,7 +114,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover return except TypeError: warnings.warn('to_clipboard in excel mode requires a single \ - character separator. Set "excel=false" or change the separator') + character separator. Set "excel=False" or change the separator') if isinstance(obj, DataFrame): # str(df) has various unhelpful defaults, like truncation From 009e3e938e6eae7deef4e92e1cbbbe2511dc3004 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 6 Jun 2018 11:05:16 -0400 Subject: [PATCH 13/21] Fixed python 27 compatibility --- pandas/io/clipboards.py | 3 + test (2).py | 752 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 755 insertions(+) create mode 100644 test (2).py diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index d2b58b3ec313f..59689a509d485 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -64,6 +64,9 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover if sep == r'\s+' and kwargs.get('engine') is None: kwargs['engine'] = 'python' + if kwargs.get('engine') == 'python' and compat.PY2: + text = text.encode('utf-8') + return read_table(StringIO(text), sep=sep, **kwargs) diff --git a/test (2).py b/test (2).py new file mode 100644 index 0000000000000..6febe674fb2a1 --- /dev/null +++ b/test (2).py @@ -0,0 +1,752 @@ +#!/usr/bin/env python + +""" +Parts of this file were taken from the pyzmq project +(https://github.com/zeromq/pyzmq) which have been permitted for use under the +BSD license. Parts are from lxml (https://github.com/lxml/lxml) +""" + +import os +from os.path import join as pjoin + +import pkg_resources +import sys +import shutil +from distutils.version import LooseVersion +from setuptools import setup, Command, find_packages + +# versioning +import versioneer +cmdclass = versioneer.get_cmdclass() + + +def is_platform_windows(): + return sys.platform == 'win32' or sys.platform == 'cygwin' + + +def is_platform_linux(): + return sys.platform == 'linux2' + + +def is_platform_mac(): + return sys.platform == 'darwin' + + +min_cython_ver = '0.24' +try: + import Cython + ver = Cython.__version__ + _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) +except ImportError: + _CYTHON_INSTALLED = False + + +min_numpy_ver = '1.9.0' +setuptools_kwargs = { + 'install_requires': [ + 'python-dateutil >= 2.5.0', + 'pytz >= 2011k', + 'numpy >= {numpy_ver}'.format(numpy_ver=min_numpy_ver), + ], + 'setup_requires': ['numpy >= {numpy_ver}'.format(numpy_ver=min_numpy_ver)], + 'zip_safe': False, +} + + +from distutils.extension import Extension # noqa:E402 +from distutils.command.build import build # noqa:E402 +from distutils.command.build_ext import build_ext as _build_ext # noqa:E402 + +try: + if not _CYTHON_INSTALLED: + raise ImportError('No supported version of Cython installed.') + try: + from Cython.Distutils.old_build_ext import old_build_ext as _build_ext # noqa:F811,E501 + except ImportError: + # Pre 0.25 + from Cython.Distutils import build_ext as _build_ext + cython = True +except ImportError: + cython = False + + +if cython: + try: + try: + from Cython import Tempita as tempita + except ImportError: + import tempita + except ImportError: + raise ImportError('Building pandas requires Tempita: ' + 'pip install Tempita') + + +_pxi_dep_template = { + 'algos': ['_libs/algos_common_helper.pxi.in', + '_libs/algos_take_helper.pxi.in', + '_libs/algos_rank_helper.pxi.in'], + 'groupby': ['_libs/groupby_helper.pxi.in'], + 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], + 'reshape': ['_libs/reshape_helper.pxi.in'], + 'hashtable': ['_libs/hashtable_class_helper.pxi.in', + '_libs/hashtable_func_helper.pxi.in'], + 'index': ['_libs/index_class_helper.pxi.in'], + 'sparse': ['_libs/sparse_op_helper.pxi.in'], + 'interval': ['_libs/intervaltree.pxi.in']} + +_pxifiles = [] +_pxi_dep = {} +for module, files in _pxi_dep_template.items(): + pxi_files = [pjoin('pandas', x) for x in files] + _pxifiles.extend(pxi_files) + _pxi_dep[module] = pxi_files + + +class build_ext(_build_ext): + def build_extensions(self): + + # if builing from c files, don't need to + # generate template output + if cython: + for pxifile in _pxifiles: + # build pxifiles first, template extension must be .pxi.in + assert pxifile.endswith('.pxi.in') + outfile = pxifile[:-3] + + if (os.path.exists(outfile) and + os.stat(pxifile).st_mtime < os.stat(outfile).st_mtime): + # if .pxi.in is not updated, no need to output .pxi + continue + + with open(pxifile, "r") as f: + tmpl = f.read() + pyxcontent = tempita.sub(tmpl) + + with open(outfile, "w") as f: + f.write(pyxcontent) + + numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') + + for ext in self.extensions: + if (hasattr(ext, 'include_dirs') and + numpy_incl not in ext.include_dirs): + ext.include_dirs.append(numpy_incl) + _build_ext.build_extensions(self) + + +DESCRIPTION = ("Powerful data structures for data analysis, time series, " + "and statistics") +LONG_DESCRIPTION = """ +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with structured (tabular, multidimensional, +potentially heterogeneous) and time series data both easy and intuitive. It +aims to be the fundamental high-level building block for doing practical, +**real world** data analysis in Python. Additionally, it has the broader goal +of becoming **the most powerful and flexible open source data analysis / +manipulation tool available in any language**. It is already well on its way +toward this goal. + +pandas is well suited for many different kinds of data: + + - Tabular data with heterogeneously-typed columns, as in an SQL table or + Excel spreadsheet + - Ordered and unordered (not necessarily fixed-frequency) time series data. + - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and + column labels + - Any other form of observational / statistical data sets. The data actually + need not be labeled at all to be placed into a pandas data structure + +The two primary data structures of pandas, Series (1-dimensional) and DataFrame +(2-dimensional), handle the vast majority of typical use cases in finance, +statistics, social science, and many areas of engineering. For R users, +DataFrame provides everything that R's ``data.frame`` provides and much +more. pandas is built on top of `NumPy `__ and is +intended to integrate well within a scientific computing environment with many +other 3rd party libraries. + +Here are just a few of the things that pandas does well: + + - Easy handling of **missing data** (represented as NaN) in floating point as + well as non-floating point data + - Size mutability: columns can be **inserted and deleted** from DataFrame and + higher dimensional objects + - Automatic and explicit **data alignment**: objects can be explicitly + aligned to a set of labels, or the user can simply ignore the labels and + let `Series`, `DataFrame`, etc. automatically align the data for you in + computations + - Powerful, flexible **group by** functionality to perform + split-apply-combine operations on data sets, for both aggregating and + transforming data + - Make it **easy to convert** ragged, differently-indexed data in other + Python and NumPy data structures into DataFrame objects + - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets + - Intuitive **merging** and **joining** data sets + - Flexible **reshaping** and pivoting of data sets + - **Hierarchical** labeling of axes (possible to have multiple labels per + tick) + - Robust IO tools for loading data from **flat files** (CSV and delimited), + Excel files, databases, and saving / loading data from the ultrafast **HDF5 + format** + - **Time series**-specific functionality: date range generation and frequency + conversion, moving window statistics, moving window linear regressions, + date shifting and lagging, etc. + +Many of these principles are here to address the shortcomings frequently +experienced using other languages / scientific research environments. For data +scientists, working with data is typically divided into multiple stages: +munging and cleaning data, analyzing / modeling it, then organizing the results +of the analysis into a form suitable for plotting or tabular display. pandas is +the ideal tool for all of these tasks. +""" + +DISTNAME = 'pandas' +LICENSE = 'BSD' +AUTHOR = "The PyData Development Team" +EMAIL = "pydata@googlegroups.com" +URL = "http://pandas.pydata.org" +DOWNLOAD_URL = '' +CLASSIFIERS = [ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Operating System :: OS Independent', + 'Intended Audience :: Science/Research', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Cython', + 'Topic :: Scientific/Engineering'] + + +class CleanCommand(Command): + """Custom distutils command to clean the .so and .pyc files.""" + + user_options = [("all", "a", "")] + + def initialize_options(self): + self.all = True + self._clean_me = [] + self._clean_trees = [] + + base = pjoin('pandas', '_libs', 'src') + dt = pjoin(base, 'datetime') + src = base + util = pjoin('pandas', 'util') + parser = pjoin(base, 'parser') + ujson_python = pjoin(base, 'ujson', 'python') + ujson_lib = pjoin(base, 'ujson', 'lib') + self._clean_exclude = [pjoin(dt, 'np_datetime.c'), + pjoin(dt, 'np_datetime_strings.c'), + pjoin(src, 'period_helper.c'), + pjoin(parser, 'tokenizer.c'), + pjoin(parser, 'io.c'), + pjoin(ujson_python, 'ujson.c'), + pjoin(ujson_python, 'objToJSON.c'), + pjoin(ujson_python, 'JSONtoObj.c'), + pjoin(ujson_lib, 'ultrajsonenc.c'), + pjoin(ujson_lib, 'ultrajsondec.c'), + pjoin(util, 'move.c'), + ] + + for root, dirs, files in os.walk('pandas'): + for f in files: + filepath = pjoin(root, f) + if filepath in self._clean_exclude: + continue + + if os.path.splitext(f)[-1] in ('.pyc', '.so', '.o', + '.pyo', + '.pyd', '.c', '.orig'): + self._clean_me.append(filepath) + for d in dirs: + if d == '__pycache__': + self._clean_trees.append(pjoin(root, d)) + + # clean the generated pxi files + for pxifile in _pxifiles: + pxifile = pxifile.replace(".pxi.in", ".pxi") + self._clean_me.append(pxifile) + + for d in ('build', 'dist'): + if os.path.exists(d): + self._clean_trees.append(d) + + def finalize_options(self): + pass + + def run(self): + for clean_me in self._clean_me: + try: + os.unlink(clean_me) + except Exception: + pass + for clean_tree in self._clean_trees: + try: + shutil.rmtree(clean_tree) + except Exception: + pass + + +# we need to inherit from the versioneer +# class as it encodes the version info +sdist_class = cmdclass['sdist'] + + +class CheckSDist(sdist_class): + """Custom sdist that ensures Cython has compiled all pyx files to c.""" + + _pyxfiles = ['pandas/_libs/lib.pyx', + 'pandas/_libs/hashtable.pyx', + 'pandas/_libs/tslib.pyx', + 'pandas/_libs/index.pyx', + 'pandas/_libs/internals.pyx', + 'pandas/_libs/algos.pyx', + 'pandas/_libs/join.pyx', + 'pandas/_libs/indexing.pyx', + 'pandas/_libs/interval.pyx', + 'pandas/_libs/hashing.pyx', + 'pandas/_libs/missing.pyx', + 'pandas/_libs/reduction.pyx', + 'pandas/_libs/testing.pyx', + 'pandas/_libs/skiplist.pyx', + 'pandas/_libs/sparse.pyx', + 'pandas/_libs/ops.pyx', + 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/ccalendar.pyx', + 'pandas/_libs/tslibs/period.pyx', + 'pandas/_libs/tslibs/strptime.pyx', + 'pandas/_libs/tslibs/np_datetime.pyx', + 'pandas/_libs/tslibs/timedeltas.pyx', + 'pandas/_libs/tslibs/timestamps.pyx', + 'pandas/_libs/tslibs/timezones.pyx', + 'pandas/_libs/tslibs/conversion.pyx', + 'pandas/_libs/tslibs/fields.pyx', + 'pandas/_libs/tslibs/offsets.pyx', + 'pandas/_libs/tslibs/frequencies.pyx', + 'pandas/_libs/tslibs/resolution.pyx', + 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/writers.pyx', + 'pandas/io/sas/sas.pyx'] + + _cpp_pyxfiles = ['pandas/_libs/window.pyx', + 'pandas/io/msgpack/_packer.pyx', + 'pandas/io/msgpack/_unpacker.pyx'] + + def initialize_options(self): + sdist_class.initialize_options(self) + + def run(self): + if 'cython' in cmdclass: + self.run_command('cython') + else: + # If we are not running cython then + # compile the extensions correctly + pyx_files = [(self._pyxfiles, 'c'), (self._cpp_pyxfiles, 'cpp')] + + for pyxfiles, extension in pyx_files: + for pyxfile in pyxfiles: + sourcefile = pyxfile[:-3] + extension + msg = ("{extension}-source file '{source}' not found.\n" + "Run 'setup.py cython' before sdist.".format( + source=sourcefile, extension=extension)) + assert os.path.isfile(sourcefile), msg + sdist_class.run(self) + + +class CheckingBuildExt(build_ext): + """ + Subclass build_ext to get clearer report if Cython is necessary. + + """ + + def check_cython_extensions(self, extensions): + for ext in extensions: + for src in ext.sources: + if not os.path.exists(src): + print("{}: -> [{}]".format(ext.name, ext.sources)) + raise Exception("""Cython-generated file '{src}' not found. + Cython is required to compile pandas from a development branch. + Please install Cython or download a release package of pandas. + """.format(src=src)) + + def build_extensions(self): + self.check_cython_extensions(self.extensions) + build_ext.build_extensions(self) + + +class CythonCommand(build_ext): + """Custom distutils command subclassed from Cython.Distutils.build_ext + to compile pyx->c, and stop there. All this does is override the + C-compile method build_extension() with a no-op.""" + def build_extension(self, ext): + pass + + +class DummyBuildSrc(Command): + """ numpy's build_src command interferes with Cython's build_ext. + """ + user_options = [] + + def initialize_options(self): + self.py_modules_dict = {} + + def finalize_options(self): + pass + + def run(self): + pass + + +cmdclass.update({'clean': CleanCommand, + 'build': build}) + +try: + from wheel.bdist_wheel import bdist_wheel + + class BdistWheel(bdist_wheel): + def get_tag(self): + tag = bdist_wheel.get_tag(self) + repl = 'macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64' + if tag[2] == 'macosx_10_6_intel': + tag = (tag[0], tag[1], repl) + return tag + cmdclass['bdist_wheel'] = BdistWheel +except ImportError: + pass + +if cython: + suffix = '.pyx' + cmdclass['build_ext'] = CheckingBuildExt + cmdclass['cython'] = CythonCommand +else: + suffix = '.c' + cmdclass['build_src'] = DummyBuildSrc + cmdclass['build_ext'] = CheckingBuildExt + +if sys.byteorder == 'big': + endian_macro = [('__BIG_ENDIAN__', '1')] +else: + endian_macro = [('__LITTLE_ENDIAN__', '1')] + +lib_depends = ['inference'] + + +def srcpath(name=None, suffix='.pyx', subdir='src'): + return pjoin('pandas', subdir, name + suffix) + + +if suffix == '.pyx': + lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') + for f in lib_depends] + lib_depends.append('pandas/_libs/src/util.pxd') +else: + lib_depends = [] + plib_depends = [] + +common_include = ['pandas/_libs/src/klib', 'pandas/_libs/src'] + + +def pxd(name): + return pjoin('pandas', name + '.pxd') + + +# args to ignore warnings +if is_platform_windows(): + extra_compile_args = [] +else: + extra_compile_args = ['-Wno-unused-function'] + +lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', + 'pandas/_libs/src/parse_helper.h', + 'pandas/_libs/src/compat_helper.h'] + +np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h', + 'pandas/_libs/src/datetime/np_datetime_strings.h'] +np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'] + +tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] + +# some linux distros require it +libraries = ['m'] if not is_platform_windows() else [] + +ext_data = { + '_libs.algos': { + 'pyxfile': '_libs/algos', + 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], + 'depends': _pxi_dep['algos']}, + '_libs.groupby': { + 'pyxfile': '_libs/groupby', + 'pxdfiles': ['_libs/src/util', '_libs/algos'], + 'depends': _pxi_dep['groupby']}, + '_libs.hashing': { + 'pyxfile': '_libs/hashing'}, + '_libs.hashtable': { + 'pyxfile': '_libs/hashtable', + 'pxdfiles': ['_libs/hashtable', '_libs/missing', '_libs/khash'], + 'depends': (['pandas/_libs/src/klib/khash_python.h'] + + _pxi_dep['hashtable'])}, + '_libs.index': { + 'pyxfile': '_libs/index', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['index'], + 'sources': np_datetime_sources}, + '_libs.indexing': { + 'pyxfile': '_libs/indexing'}, + '_libs.internals': { + 'pyxfile': '_libs/internals'}, + '_libs.interval': { + 'pyxfile': '_libs/interval', + 'pxdfiles': ['_libs/hashtable'], + 'depends': _pxi_dep['interval']}, + '_libs.join': { + 'pyxfile': '_libs/join', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['join']}, + '_libs.lib': { + 'pyxfile': '_libs/lib', + 'pxdfiles': ['_libs/src/util', + '_libs/missing', + '_libs/tslibs/conversion'], + 'depends': lib_depends + tseries_depends}, + '_libs.missing': { + 'pyxfile': '_libs/missing', + 'pxdfiles': ['_libs/src/util'], + 'depends': tseries_depends}, + '_libs.parsers': { + 'pyxfile': '_libs/parsers', + 'depends': ['pandas/_libs/src/parser/tokenizer.h', + 'pandas/_libs/src/parser/io.h', + 'pandas/_libs/src/numpy_helper.h'], + 'sources': ['pandas/_libs/src/parser/tokenizer.c', + 'pandas/_libs/src/parser/io.c']}, + '_libs.reduction': { + 'pyxfile': '_libs/reduction', + 'pxdfiles': ['_libs/src/util']}, + '_libs.ops': { + 'pyxfile': '_libs/ops', + 'pxdfiles': ['_libs/src/util', + '_libs/missing']}, + '_libs.tslibs.period': { + 'pyxfile': '_libs/tslibs/period', + 'pxdfiles': ['_libs/src/util', + '_libs/missing', + '_libs/tslibs/ccalendar', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timezones', + '_libs/tslibs/nattype'], + 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], + 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, + '_libs.properties': { + 'pyxfile': '_libs/properties', + 'include': []}, + '_libs.reshape': { + 'pyxfile': '_libs/reshape', + 'depends': _pxi_dep['reshape']}, + '_libs.skiplist': { + 'pyxfile': '_libs/skiplist', + 'depends': ['pandas/_libs/src/skiplist.h']}, + '_libs.sparse': { + 'pyxfile': '_libs/sparse', + 'depends': _pxi_dep['sparse']}, + '_libs.tslib': { + 'pyxfile': '_libs/tslib', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/conversion', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timestamps', + '_libs/tslibs/timezones', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.ccalendar': { + 'pyxfile': '_libs/tslibs/ccalendar'}, + '_libs.tslibs.conversion': { + 'pyxfile': '_libs/tslibs/conversion', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype', + '_libs/tslibs/timezones', + '_libs/tslibs/timedeltas'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.fields': { + 'pyxfile': '_libs/tslibs/fields', + 'pxdfiles': ['_libs/tslibs/ccalendar', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.frequencies': { + 'pyxfile': '_libs/tslibs/frequencies', + 'pxdfiles': ['_libs/src/util']}, + '_libs.tslibs.nattype': { + 'pyxfile': '_libs/tslibs/nattype', + 'pxdfiles': ['_libs/src/util']}, + '_libs.tslibs.np_datetime': { + 'pyxfile': '_libs/tslibs/np_datetime', + 'depends': np_datetime_headers, + 'sources': np_datetime_sources}, + '_libs.tslibs.offsets': { + 'pyxfile': '_libs/tslibs/offsets', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/conversion', + '_libs/tslibs/frequencies', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.parsing': { + 'pyxfile': '_libs/tslibs/parsing', + 'pxdfiles': ['_libs/src/util']}, + '_libs.tslibs.resolution': { + 'pyxfile': '_libs/tslibs/resolution', + 'pxdfiles': ['_libs/src/util', + '_libs/khash', + '_libs/tslibs/frequencies', + '_libs/tslibs/timezones'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.strptime': { + 'pyxfile': '_libs/tslibs/strptime', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.timedeltas': { + 'pyxfile': '_libs/tslibs/timedeltas', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype'], + 'depends': np_datetime_headers, + 'sources': np_datetime_sources}, + '_libs.tslibs.timestamps': { + 'pyxfile': '_libs/tslibs/timestamps', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/ccalendar', + '_libs/tslibs/conversion', + '_libs/tslibs/nattype', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timezones'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.timezones': { + 'pyxfile': '_libs/tslibs/timezones', + 'pxdfiles': ['_libs/src/util']}, + '_libs.testing': { + 'pyxfile': '_libs/testing'}, + '_libs.window': { + 'pyxfile': '_libs/window', + 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], + 'language': 'c++', + 'suffix': '.cpp'}, + '_libs.writers': { + 'pyxfile': '_libs/writers', + 'pxdfiles': ['_libs/src/util']}, + 'io.sas._sas': { + 'pyxfile': 'io/sas/sas'}, + 'io.msgpack._packer': { + 'macros': endian_macro, + 'depends': ['pandas/_libs/src/msgpack/pack.h', + 'pandas/_libs/src/msgpack/pack_template.h'], + 'include': ['pandas/_libs/src/msgpack'] + common_include, + 'language': 'c++', + 'suffix': '.cpp', + 'pyxfile': 'io/msgpack/_packer', + 'subdir': 'io/msgpack'}, + 'io.msgpack._unpacker': { + 'depends': ['pandas/_libs/src/msgpack/unpack.h', + 'pandas/_libs/src/msgpack/unpack_define.h', + 'pandas/_libs/src/msgpack/unpack_template.h'], + 'macros': endian_macro, + 'include': ['pandas/_libs/src/msgpack'] + common_include, + 'language': 'c++', + 'suffix': '.cpp', + 'pyxfile': 'io/msgpack/_unpacker', + 'subdir': 'io/msgpack' + } +} + +extensions = [] + +for name, data in ext_data.items(): + source_suffix = suffix if suffix == '.pyx' else data.get('suffix', '.c') + + sources = [srcpath(data['pyxfile'], suffix=source_suffix, subdir='')] + + pxds = [pxd(x) for x in data.get('pxdfiles', [])] + if suffix == '.pyx' and pxds: + sources.extend(pxds) + + sources.extend(data.get('sources', [])) + + include = data.get('include', common_include) + + obj = Extension('pandas.{name}'.format(name=name), + sources=sources, + depends=data.get('depends', []), + include_dirs=include, + language=data.get('language', 'c'), + define_macros=data.get('macros', []), + extra_compile_args=extra_compile_args) + + extensions.append(obj) + +# ---------------------------------------------------------------------- +# ujson + +if suffix == '.pyx': + # undo dumb setuptools bug clobbering .pyx sources back to .c + for ext in extensions: + if ext.sources[0].endswith(('.c', '.cpp')): + root, _ = os.path.splitext(ext.sources[0]) + ext.sources[0] = root + suffix + +ujson_ext = Extension('pandas._libs.json', + depends=['pandas/_libs/src/ujson/lib/ultrajson.h'], + sources=(['pandas/_libs/src/ujson/python/ujson.c', + 'pandas/_libs/src/ujson/python/objToJSON.c', + 'pandas/_libs/src/ujson/python/JSONtoObj.c', + 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', + 'pandas/_libs/src/ujson/lib/ultrajsondec.c'] + + np_datetime_sources), + include_dirs=['pandas/_libs/src/ujson/python', + 'pandas/_libs/src/ujson/lib', + 'pandas/_libs/src/datetime'], + extra_compile_args=(['-D_GNU_SOURCE'] + + extra_compile_args)) + + +extensions.append(ujson_ext) + +# ---------------------------------------------------------------------- +# util +# extension for pseudo-safely moving bytes into mutable buffers +_move_ext = Extension('pandas.util._move', + depends=[], + sources=['pandas/util/move.c']) +extensions.append(_move_ext) + +# The build cache system does string matching below this point. +# if you change something, be careful. + +setup(name=DISTNAME, + maintainer=AUTHOR, + version=versioneer.get_version(), + packages=find_packages(include=['pandas', 'pandas.*']), + package_data={'': ['data/*', 'templates/*'], + 'pandas.tests.io': ['data/legacy_hdf/*.h5', + 'data/legacy_pickle/*/*.pickle', + 'data/legacy_msgpack/*/*.msgpack', + 'data/html_encoding/*.html']}, + ext_modules=extensions, + maintainer_email=EMAIL, + description=DESCRIPTION, + license=LICENSE, + cmdclass=cmdclass, + url=URL, + download_url=DOWNLOAD_URL, + long_description=LONG_DESCRIPTION, + classifiers=CLASSIFIERS, + platforms='any', + python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*', + **setuptools_kwargs) From cd4be4b84fb037607862a8d0fc37f2554216ec0c Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 6 Jun 2018 11:09:56 -0400 Subject: [PATCH 14/21] Style fix Added comments to changes Undo wrong file Trying to fix mistakes Attempt 2 Performance boost to printing DataFrames PEP8 Changes Revert "Merge remote-tracking branch 'refs/remotes/origin/fix-excel-clipboard'" This reverts commit 8c7b0d00206db1125b8d83b2a2d4f028f8a13b18, reversing changes made to 52cf5bcb389c3e486210709d7e31998bcfc81562. Revert "Merge remote-tracking branch 'refs/remotes/origin/master' into fix-excel-clipboard" This reverts commit cbb3999b95af2cab61b086b59972dedbe4aa5c42, reversing changes made to a1c7422f67221363f63eac52f40868d5d2549020. --- doc/source/whatsnew/v0.23.1.txt | 1 - pandas/io/clipboards.py | 19 +- pandas/tests/io/test_clipboard.py | 11 + setup.py | 0 test (2).py | 752 ------------------------------ 5 files changed, 26 insertions(+), 757 deletions(-) mode change 100755 => 100644 setup.py delete mode 100644 test (2).py diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 7608d75db7264..9c29c34adb7dd 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -96,7 +96,6 @@ I/O - Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`) - Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`) - Bug in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) -- Bug in :func:`DataFrame.to_clipboard` where data sent to clipboard was not properly tab-delimited even when ``excel=True`` (:issue:`21104`) - Bug in :meth:`read_stata` and :class:`StataReader` which did not correctly decode utf-8 strings on Python 3 from Stata 14 files (dta version 118) (:issue:`21244`) - diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 59689a509d485..49cc03ad38103 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -58,12 +58,22 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: sep = '\t' + # Edge case where sep is specified to be None if sep is None and kwargs.get('delim_whitespace') is None: sep = r'\s+' - if sep == r'\s+' and kwargs.get('engine') is None: + # Regex separator currently only works with python engine. + # Default to python if separator is multi-character (regex) + if len(sep) > 1 and kwargs.get('engine') is None: kwargs['engine'] = 'python' - + elif len(sep) > 1 and kwargs.get('engine') == 'c': + warnings.warn('from_clipboard with regex separator does not work' + ' properly with c engine') + + # In PY2, the c table reader first encodes text with UTF-8 but Python + # table reader uses the format of the passed string. + # For PY2, encode strings first so that output from python and c + # engines produce consistent results if kwargs.get('engine') == 'python' and compat.PY2: text = text.encode('utf-8') @@ -116,8 +126,9 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover clipboard_set(text) return except TypeError: - warnings.warn('to_clipboard in excel mode requires a single \ - character separator. Set "excel=False" or change the separator') + warnings.warn('to_clipboard in excel mode requires a single ' + 'character separator. Set "excel=False" or change ' + 'the separator') if isinstance(obj, DataFrame): # str(df) has various unhelpful defaults, like truncation diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 98c0effabec84..9350fd39bb865 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -136,3 +136,14 @@ def test_round_trip_valid_encodings(self): for enc in ['UTF-8', 'utf-8', 'utf8']: for dt in self.data_types: self.check_round_trip_frame(dt, encoding=enc) + + def test_clipboard_read_delim_warning(self): + for dt in self.data_types: + with tm.assert_produces_warning(): + self.data[dt].to_clipboard() + res = pd.read_clipboard(sep=r'\s+', engine='c') + + def test_excel_write_delim_warning(self): + for dt in self.data_types: + with tm.assert_produces_warning(): + self.data[dt].to_clipboard(excel=True, sep=r'\t') diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 diff --git a/test (2).py b/test (2).py deleted file mode 100644 index 6febe674fb2a1..0000000000000 --- a/test (2).py +++ /dev/null @@ -1,752 +0,0 @@ -#!/usr/bin/env python - -""" -Parts of this file were taken from the pyzmq project -(https://github.com/zeromq/pyzmq) which have been permitted for use under the -BSD license. Parts are from lxml (https://github.com/lxml/lxml) -""" - -import os -from os.path import join as pjoin - -import pkg_resources -import sys -import shutil -from distutils.version import LooseVersion -from setuptools import setup, Command, find_packages - -# versioning -import versioneer -cmdclass = versioneer.get_cmdclass() - - -def is_platform_windows(): - return sys.platform == 'win32' or sys.platform == 'cygwin' - - -def is_platform_linux(): - return sys.platform == 'linux2' - - -def is_platform_mac(): - return sys.platform == 'darwin' - - -min_cython_ver = '0.24' -try: - import Cython - ver = Cython.__version__ - _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) -except ImportError: - _CYTHON_INSTALLED = False - - -min_numpy_ver = '1.9.0' -setuptools_kwargs = { - 'install_requires': [ - 'python-dateutil >= 2.5.0', - 'pytz >= 2011k', - 'numpy >= {numpy_ver}'.format(numpy_ver=min_numpy_ver), - ], - 'setup_requires': ['numpy >= {numpy_ver}'.format(numpy_ver=min_numpy_ver)], - 'zip_safe': False, -} - - -from distutils.extension import Extension # noqa:E402 -from distutils.command.build import build # noqa:E402 -from distutils.command.build_ext import build_ext as _build_ext # noqa:E402 - -try: - if not _CYTHON_INSTALLED: - raise ImportError('No supported version of Cython installed.') - try: - from Cython.Distutils.old_build_ext import old_build_ext as _build_ext # noqa:F811,E501 - except ImportError: - # Pre 0.25 - from Cython.Distutils import build_ext as _build_ext - cython = True -except ImportError: - cython = False - - -if cython: - try: - try: - from Cython import Tempita as tempita - except ImportError: - import tempita - except ImportError: - raise ImportError('Building pandas requires Tempita: ' - 'pip install Tempita') - - -_pxi_dep_template = { - 'algos': ['_libs/algos_common_helper.pxi.in', - '_libs/algos_take_helper.pxi.in', - '_libs/algos_rank_helper.pxi.in'], - 'groupby': ['_libs/groupby_helper.pxi.in'], - 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], - 'reshape': ['_libs/reshape_helper.pxi.in'], - 'hashtable': ['_libs/hashtable_class_helper.pxi.in', - '_libs/hashtable_func_helper.pxi.in'], - 'index': ['_libs/index_class_helper.pxi.in'], - 'sparse': ['_libs/sparse_op_helper.pxi.in'], - 'interval': ['_libs/intervaltree.pxi.in']} - -_pxifiles = [] -_pxi_dep = {} -for module, files in _pxi_dep_template.items(): - pxi_files = [pjoin('pandas', x) for x in files] - _pxifiles.extend(pxi_files) - _pxi_dep[module] = pxi_files - - -class build_ext(_build_ext): - def build_extensions(self): - - # if builing from c files, don't need to - # generate template output - if cython: - for pxifile in _pxifiles: - # build pxifiles first, template extension must be .pxi.in - assert pxifile.endswith('.pxi.in') - outfile = pxifile[:-3] - - if (os.path.exists(outfile) and - os.stat(pxifile).st_mtime < os.stat(outfile).st_mtime): - # if .pxi.in is not updated, no need to output .pxi - continue - - with open(pxifile, "r") as f: - tmpl = f.read() - pyxcontent = tempita.sub(tmpl) - - with open(outfile, "w") as f: - f.write(pyxcontent) - - numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') - - for ext in self.extensions: - if (hasattr(ext, 'include_dirs') and - numpy_incl not in ext.include_dirs): - ext.include_dirs.append(numpy_incl) - _build_ext.build_extensions(self) - - -DESCRIPTION = ("Powerful data structures for data analysis, time series, " - "and statistics") -LONG_DESCRIPTION = """ -**pandas** is a Python package providing fast, flexible, and expressive data -structures designed to make working with structured (tabular, multidimensional, -potentially heterogeneous) and time series data both easy and intuitive. It -aims to be the fundamental high-level building block for doing practical, -**real world** data analysis in Python. Additionally, it has the broader goal -of becoming **the most powerful and flexible open source data analysis / -manipulation tool available in any language**. It is already well on its way -toward this goal. - -pandas is well suited for many different kinds of data: - - - Tabular data with heterogeneously-typed columns, as in an SQL table or - Excel spreadsheet - - Ordered and unordered (not necessarily fixed-frequency) time series data. - - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and - column labels - - Any other form of observational / statistical data sets. The data actually - need not be labeled at all to be placed into a pandas data structure - -The two primary data structures of pandas, Series (1-dimensional) and DataFrame -(2-dimensional), handle the vast majority of typical use cases in finance, -statistics, social science, and many areas of engineering. For R users, -DataFrame provides everything that R's ``data.frame`` provides and much -more. pandas is built on top of `NumPy `__ and is -intended to integrate well within a scientific computing environment with many -other 3rd party libraries. - -Here are just a few of the things that pandas does well: - - - Easy handling of **missing data** (represented as NaN) in floating point as - well as non-floating point data - - Size mutability: columns can be **inserted and deleted** from DataFrame and - higher dimensional objects - - Automatic and explicit **data alignment**: objects can be explicitly - aligned to a set of labels, or the user can simply ignore the labels and - let `Series`, `DataFrame`, etc. automatically align the data for you in - computations - - Powerful, flexible **group by** functionality to perform - split-apply-combine operations on data sets, for both aggregating and - transforming data - - Make it **easy to convert** ragged, differently-indexed data in other - Python and NumPy data structures into DataFrame objects - - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** - of large data sets - - Intuitive **merging** and **joining** data sets - - Flexible **reshaping** and pivoting of data sets - - **Hierarchical** labeling of axes (possible to have multiple labels per - tick) - - Robust IO tools for loading data from **flat files** (CSV and delimited), - Excel files, databases, and saving / loading data from the ultrafast **HDF5 - format** - - **Time series**-specific functionality: date range generation and frequency - conversion, moving window statistics, moving window linear regressions, - date shifting and lagging, etc. - -Many of these principles are here to address the shortcomings frequently -experienced using other languages / scientific research environments. For data -scientists, working with data is typically divided into multiple stages: -munging and cleaning data, analyzing / modeling it, then organizing the results -of the analysis into a form suitable for plotting or tabular display. pandas is -the ideal tool for all of these tasks. -""" - -DISTNAME = 'pandas' -LICENSE = 'BSD' -AUTHOR = "The PyData Development Team" -EMAIL = "pydata@googlegroups.com" -URL = "http://pandas.pydata.org" -DOWNLOAD_URL = '' -CLASSIFIERS = [ - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Operating System :: OS Independent', - 'Intended Audience :: Science/Research', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Cython', - 'Topic :: Scientific/Engineering'] - - -class CleanCommand(Command): - """Custom distutils command to clean the .so and .pyc files.""" - - user_options = [("all", "a", "")] - - def initialize_options(self): - self.all = True - self._clean_me = [] - self._clean_trees = [] - - base = pjoin('pandas', '_libs', 'src') - dt = pjoin(base, 'datetime') - src = base - util = pjoin('pandas', 'util') - parser = pjoin(base, 'parser') - ujson_python = pjoin(base, 'ujson', 'python') - ujson_lib = pjoin(base, 'ujson', 'lib') - self._clean_exclude = [pjoin(dt, 'np_datetime.c'), - pjoin(dt, 'np_datetime_strings.c'), - pjoin(src, 'period_helper.c'), - pjoin(parser, 'tokenizer.c'), - pjoin(parser, 'io.c'), - pjoin(ujson_python, 'ujson.c'), - pjoin(ujson_python, 'objToJSON.c'), - pjoin(ujson_python, 'JSONtoObj.c'), - pjoin(ujson_lib, 'ultrajsonenc.c'), - pjoin(ujson_lib, 'ultrajsondec.c'), - pjoin(util, 'move.c'), - ] - - for root, dirs, files in os.walk('pandas'): - for f in files: - filepath = pjoin(root, f) - if filepath in self._clean_exclude: - continue - - if os.path.splitext(f)[-1] in ('.pyc', '.so', '.o', - '.pyo', - '.pyd', '.c', '.orig'): - self._clean_me.append(filepath) - for d in dirs: - if d == '__pycache__': - self._clean_trees.append(pjoin(root, d)) - - # clean the generated pxi files - for pxifile in _pxifiles: - pxifile = pxifile.replace(".pxi.in", ".pxi") - self._clean_me.append(pxifile) - - for d in ('build', 'dist'): - if os.path.exists(d): - self._clean_trees.append(d) - - def finalize_options(self): - pass - - def run(self): - for clean_me in self._clean_me: - try: - os.unlink(clean_me) - except Exception: - pass - for clean_tree in self._clean_trees: - try: - shutil.rmtree(clean_tree) - except Exception: - pass - - -# we need to inherit from the versioneer -# class as it encodes the version info -sdist_class = cmdclass['sdist'] - - -class CheckSDist(sdist_class): - """Custom sdist that ensures Cython has compiled all pyx files to c.""" - - _pyxfiles = ['pandas/_libs/lib.pyx', - 'pandas/_libs/hashtable.pyx', - 'pandas/_libs/tslib.pyx', - 'pandas/_libs/index.pyx', - 'pandas/_libs/internals.pyx', - 'pandas/_libs/algos.pyx', - 'pandas/_libs/join.pyx', - 'pandas/_libs/indexing.pyx', - 'pandas/_libs/interval.pyx', - 'pandas/_libs/hashing.pyx', - 'pandas/_libs/missing.pyx', - 'pandas/_libs/reduction.pyx', - 'pandas/_libs/testing.pyx', - 'pandas/_libs/skiplist.pyx', - 'pandas/_libs/sparse.pyx', - 'pandas/_libs/ops.pyx', - 'pandas/_libs/parsers.pyx', - 'pandas/_libs/tslibs/ccalendar.pyx', - 'pandas/_libs/tslibs/period.pyx', - 'pandas/_libs/tslibs/strptime.pyx', - 'pandas/_libs/tslibs/np_datetime.pyx', - 'pandas/_libs/tslibs/timedeltas.pyx', - 'pandas/_libs/tslibs/timestamps.pyx', - 'pandas/_libs/tslibs/timezones.pyx', - 'pandas/_libs/tslibs/conversion.pyx', - 'pandas/_libs/tslibs/fields.pyx', - 'pandas/_libs/tslibs/offsets.pyx', - 'pandas/_libs/tslibs/frequencies.pyx', - 'pandas/_libs/tslibs/resolution.pyx', - 'pandas/_libs/tslibs/parsing.pyx', - 'pandas/_libs/writers.pyx', - 'pandas/io/sas/sas.pyx'] - - _cpp_pyxfiles = ['pandas/_libs/window.pyx', - 'pandas/io/msgpack/_packer.pyx', - 'pandas/io/msgpack/_unpacker.pyx'] - - def initialize_options(self): - sdist_class.initialize_options(self) - - def run(self): - if 'cython' in cmdclass: - self.run_command('cython') - else: - # If we are not running cython then - # compile the extensions correctly - pyx_files = [(self._pyxfiles, 'c'), (self._cpp_pyxfiles, 'cpp')] - - for pyxfiles, extension in pyx_files: - for pyxfile in pyxfiles: - sourcefile = pyxfile[:-3] + extension - msg = ("{extension}-source file '{source}' not found.\n" - "Run 'setup.py cython' before sdist.".format( - source=sourcefile, extension=extension)) - assert os.path.isfile(sourcefile), msg - sdist_class.run(self) - - -class CheckingBuildExt(build_ext): - """ - Subclass build_ext to get clearer report if Cython is necessary. - - """ - - def check_cython_extensions(self, extensions): - for ext in extensions: - for src in ext.sources: - if not os.path.exists(src): - print("{}: -> [{}]".format(ext.name, ext.sources)) - raise Exception("""Cython-generated file '{src}' not found. - Cython is required to compile pandas from a development branch. - Please install Cython or download a release package of pandas. - """.format(src=src)) - - def build_extensions(self): - self.check_cython_extensions(self.extensions) - build_ext.build_extensions(self) - - -class CythonCommand(build_ext): - """Custom distutils command subclassed from Cython.Distutils.build_ext - to compile pyx->c, and stop there. All this does is override the - C-compile method build_extension() with a no-op.""" - def build_extension(self, ext): - pass - - -class DummyBuildSrc(Command): - """ numpy's build_src command interferes with Cython's build_ext. - """ - user_options = [] - - def initialize_options(self): - self.py_modules_dict = {} - - def finalize_options(self): - pass - - def run(self): - pass - - -cmdclass.update({'clean': CleanCommand, - 'build': build}) - -try: - from wheel.bdist_wheel import bdist_wheel - - class BdistWheel(bdist_wheel): - def get_tag(self): - tag = bdist_wheel.get_tag(self) - repl = 'macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64' - if tag[2] == 'macosx_10_6_intel': - tag = (tag[0], tag[1], repl) - return tag - cmdclass['bdist_wheel'] = BdistWheel -except ImportError: - pass - -if cython: - suffix = '.pyx' - cmdclass['build_ext'] = CheckingBuildExt - cmdclass['cython'] = CythonCommand -else: - suffix = '.c' - cmdclass['build_src'] = DummyBuildSrc - cmdclass['build_ext'] = CheckingBuildExt - -if sys.byteorder == 'big': - endian_macro = [('__BIG_ENDIAN__', '1')] -else: - endian_macro = [('__LITTLE_ENDIAN__', '1')] - -lib_depends = ['inference'] - - -def srcpath(name=None, suffix='.pyx', subdir='src'): - return pjoin('pandas', subdir, name + suffix) - - -if suffix == '.pyx': - lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') - for f in lib_depends] - lib_depends.append('pandas/_libs/src/util.pxd') -else: - lib_depends = [] - plib_depends = [] - -common_include = ['pandas/_libs/src/klib', 'pandas/_libs/src'] - - -def pxd(name): - return pjoin('pandas', name + '.pxd') - - -# args to ignore warnings -if is_platform_windows(): - extra_compile_args = [] -else: - extra_compile_args = ['-Wno-unused-function'] - -lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', - 'pandas/_libs/src/parse_helper.h', - 'pandas/_libs/src/compat_helper.h'] - -np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h', - 'pandas/_libs/src/datetime/np_datetime_strings.h'] -np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', - 'pandas/_libs/src/datetime/np_datetime_strings.c'] - -tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] - -# some linux distros require it -libraries = ['m'] if not is_platform_windows() else [] - -ext_data = { - '_libs.algos': { - 'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], - 'depends': _pxi_dep['algos']}, - '_libs.groupby': { - 'pyxfile': '_libs/groupby', - 'pxdfiles': ['_libs/src/util', '_libs/algos'], - 'depends': _pxi_dep['groupby']}, - '_libs.hashing': { - 'pyxfile': '_libs/hashing'}, - '_libs.hashtable': { - 'pyxfile': '_libs/hashtable', - 'pxdfiles': ['_libs/hashtable', '_libs/missing', '_libs/khash'], - 'depends': (['pandas/_libs/src/klib/khash_python.h'] + - _pxi_dep['hashtable'])}, - '_libs.index': { - 'pyxfile': '_libs/index', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], - 'depends': _pxi_dep['index'], - 'sources': np_datetime_sources}, - '_libs.indexing': { - 'pyxfile': '_libs/indexing'}, - '_libs.internals': { - 'pyxfile': '_libs/internals'}, - '_libs.interval': { - 'pyxfile': '_libs/interval', - 'pxdfiles': ['_libs/hashtable'], - 'depends': _pxi_dep['interval']}, - '_libs.join': { - 'pyxfile': '_libs/join', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], - 'depends': _pxi_dep['join']}, - '_libs.lib': { - 'pyxfile': '_libs/lib', - 'pxdfiles': ['_libs/src/util', - '_libs/missing', - '_libs/tslibs/conversion'], - 'depends': lib_depends + tseries_depends}, - '_libs.missing': { - 'pyxfile': '_libs/missing', - 'pxdfiles': ['_libs/src/util'], - 'depends': tseries_depends}, - '_libs.parsers': { - 'pyxfile': '_libs/parsers', - 'depends': ['pandas/_libs/src/parser/tokenizer.h', - 'pandas/_libs/src/parser/io.h', - 'pandas/_libs/src/numpy_helper.h'], - 'sources': ['pandas/_libs/src/parser/tokenizer.c', - 'pandas/_libs/src/parser/io.c']}, - '_libs.reduction': { - 'pyxfile': '_libs/reduction', - 'pxdfiles': ['_libs/src/util']}, - '_libs.ops': { - 'pyxfile': '_libs/ops', - 'pxdfiles': ['_libs/src/util', - '_libs/missing']}, - '_libs.tslibs.period': { - 'pyxfile': '_libs/tslibs/period', - 'pxdfiles': ['_libs/src/util', - '_libs/missing', - '_libs/tslibs/ccalendar', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], - 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], - 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, - '_libs.properties': { - 'pyxfile': '_libs/properties', - 'include': []}, - '_libs.reshape': { - 'pyxfile': '_libs/reshape', - 'depends': _pxi_dep['reshape']}, - '_libs.skiplist': { - 'pyxfile': '_libs/skiplist', - 'depends': ['pandas/_libs/src/skiplist.h']}, - '_libs.sparse': { - 'pyxfile': '_libs/sparse', - 'depends': _pxi_dep['sparse']}, - '_libs.tslib': { - 'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timestamps', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.ccalendar': { - 'pyxfile': '_libs/tslibs/ccalendar'}, - '_libs.tslibs.conversion': { - 'pyxfile': '_libs/tslibs/conversion', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype', - '_libs/tslibs/timezones', - '_libs/tslibs/timedeltas'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.fields': { - 'pyxfile': '_libs/tslibs/fields', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.frequencies': { - 'pyxfile': '_libs/tslibs/frequencies', - 'pxdfiles': ['_libs/src/util']}, - '_libs.tslibs.nattype': { - 'pyxfile': '_libs/tslibs/nattype', - 'pxdfiles': ['_libs/src/util']}, - '_libs.tslibs.np_datetime': { - 'pyxfile': '_libs/tslibs/np_datetime', - 'depends': np_datetime_headers, - 'sources': np_datetime_sources}, - '_libs.tslibs.offsets': { - 'pyxfile': '_libs/tslibs/offsets', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', - '_libs/tslibs/frequencies', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.parsing': { - 'pyxfile': '_libs/tslibs/parsing', - 'pxdfiles': ['_libs/src/util']}, - '_libs.tslibs.resolution': { - 'pyxfile': '_libs/tslibs/resolution', - 'pxdfiles': ['_libs/src/util', - '_libs/khash', - '_libs/tslibs/frequencies', - '_libs/tslibs/timezones'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.strptime': { - 'pyxfile': '_libs/tslibs/strptime', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.timedeltas': { - 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], - 'depends': np_datetime_headers, - 'sources': np_datetime_sources}, - '_libs.tslibs.timestamps': { - 'pyxfile': '_libs/tslibs/timestamps', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/ccalendar', - '_libs/tslibs/conversion', - '_libs/tslibs/nattype', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones'], - 'depends': tseries_depends, - 'sources': np_datetime_sources}, - '_libs.tslibs.timezones': { - 'pyxfile': '_libs/tslibs/timezones', - 'pxdfiles': ['_libs/src/util']}, - '_libs.testing': { - 'pyxfile': '_libs/testing'}, - '_libs.window': { - 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], - 'language': 'c++', - 'suffix': '.cpp'}, - '_libs.writers': { - 'pyxfile': '_libs/writers', - 'pxdfiles': ['_libs/src/util']}, - 'io.sas._sas': { - 'pyxfile': 'io/sas/sas'}, - 'io.msgpack._packer': { - 'macros': endian_macro, - 'depends': ['pandas/_libs/src/msgpack/pack.h', - 'pandas/_libs/src/msgpack/pack_template.h'], - 'include': ['pandas/_libs/src/msgpack'] + common_include, - 'language': 'c++', - 'suffix': '.cpp', - 'pyxfile': 'io/msgpack/_packer', - 'subdir': 'io/msgpack'}, - 'io.msgpack._unpacker': { - 'depends': ['pandas/_libs/src/msgpack/unpack.h', - 'pandas/_libs/src/msgpack/unpack_define.h', - 'pandas/_libs/src/msgpack/unpack_template.h'], - 'macros': endian_macro, - 'include': ['pandas/_libs/src/msgpack'] + common_include, - 'language': 'c++', - 'suffix': '.cpp', - 'pyxfile': 'io/msgpack/_unpacker', - 'subdir': 'io/msgpack' - } -} - -extensions = [] - -for name, data in ext_data.items(): - source_suffix = suffix if suffix == '.pyx' else data.get('suffix', '.c') - - sources = [srcpath(data['pyxfile'], suffix=source_suffix, subdir='')] - - pxds = [pxd(x) for x in data.get('pxdfiles', [])] - if suffix == '.pyx' and pxds: - sources.extend(pxds) - - sources.extend(data.get('sources', [])) - - include = data.get('include', common_include) - - obj = Extension('pandas.{name}'.format(name=name), - sources=sources, - depends=data.get('depends', []), - include_dirs=include, - language=data.get('language', 'c'), - define_macros=data.get('macros', []), - extra_compile_args=extra_compile_args) - - extensions.append(obj) - -# ---------------------------------------------------------------------- -# ujson - -if suffix == '.pyx': - # undo dumb setuptools bug clobbering .pyx sources back to .c - for ext in extensions: - if ext.sources[0].endswith(('.c', '.cpp')): - root, _ = os.path.splitext(ext.sources[0]) - ext.sources[0] = root + suffix - -ujson_ext = Extension('pandas._libs.json', - depends=['pandas/_libs/src/ujson/lib/ultrajson.h'], - sources=(['pandas/_libs/src/ujson/python/ujson.c', - 'pandas/_libs/src/ujson/python/objToJSON.c', - 'pandas/_libs/src/ujson/python/JSONtoObj.c', - 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', - 'pandas/_libs/src/ujson/lib/ultrajsondec.c'] + - np_datetime_sources), - include_dirs=['pandas/_libs/src/ujson/python', - 'pandas/_libs/src/ujson/lib', - 'pandas/_libs/src/datetime'], - extra_compile_args=(['-D_GNU_SOURCE'] + - extra_compile_args)) - - -extensions.append(ujson_ext) - -# ---------------------------------------------------------------------- -# util -# extension for pseudo-safely moving bytes into mutable buffers -_move_ext = Extension('pandas.util._move', - depends=[], - sources=['pandas/util/move.c']) -extensions.append(_move_ext) - -# The build cache system does string matching below this point. -# if you change something, be careful. - -setup(name=DISTNAME, - maintainer=AUTHOR, - version=versioneer.get_version(), - packages=find_packages(include=['pandas', 'pandas.*']), - package_data={'': ['data/*', 'templates/*'], - 'pandas.tests.io': ['data/legacy_hdf/*.h5', - 'data/legacy_pickle/*/*.pickle', - 'data/legacy_msgpack/*/*.msgpack', - 'data/html_encoding/*.html']}, - ext_modules=extensions, - maintainer_email=EMAIL, - description=DESCRIPTION, - license=LICENSE, - cmdclass=cmdclass, - url=URL, - download_url=DOWNLOAD_URL, - long_description=LONG_DESCRIPTION, - classifiers=CLASSIFIERS, - platforms='any', - python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*', - **setuptools_kwargs) From 30f5d78bc7e28329cea97973597d5a0ac33b1744 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Mon, 18 Jun 2018 18:29:00 -0400 Subject: [PATCH 15/21] Fix test Fix Test --- pandas/tests/io/test_clipboard.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 9350fd39bb865..eedfdd1993411 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -66,6 +66,15 @@ def setup_class(cls): def teardown_class(cls): del cls.data_types, cls.data + def test_read_delim_warning(self): + with tm.assert_produces_warning(): + self.data['string'].to_clipboard() + pd.read_clipboard(sep=r'\s+', engine='c') + + def test_write_delim_warning(self): + with tm.assert_produces_warning(): + self.data['string'].to_clipboard(excel=True, sep=r'\s+') + def check_round_trip_frame(self, data_type, excel=None, sep=None, encoding=None): data = self.data[data_type] @@ -136,14 +145,3 @@ def test_round_trip_valid_encodings(self): for enc in ['UTF-8', 'utf-8', 'utf8']: for dt in self.data_types: self.check_round_trip_frame(dt, encoding=enc) - - def test_clipboard_read_delim_warning(self): - for dt in self.data_types: - with tm.assert_produces_warning(): - self.data[dt].to_clipboard() - res = pd.read_clipboard(sep=r'\s+', engine='c') - - def test_excel_write_delim_warning(self): - for dt in self.data_types: - with tm.assert_produces_warning(): - self.data[dt].to_clipboard(excel=True, sep=r'\t') From e363374433caea1fec59e7f09921651be8207f65 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Sat, 23 Jun 2018 15:28:29 -0400 Subject: [PATCH 16/21] Added warning for excel=False and sep!=None --- pandas/io/clipboards.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 49cc03ad38103..b1cabb2096b47 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -1,6 +1,6 @@ """ io on the clipboard """ from pandas import compat, get_option, option_context, DataFrame -from pandas.compat import StringIO, PY2 +from pandas.compat import StringIO import warnings @@ -58,7 +58,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: sep = '\t' - # Edge case where sep is specified to be None + # Edge case where sep is specified to be None, return to default if sep is None and kwargs.get('delim_whitespace') is None: sep = r'\s+' @@ -71,8 +71,8 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover ' properly with c engine') # In PY2, the c table reader first encodes text with UTF-8 but Python - # table reader uses the format of the passed string. - # For PY2, encode strings first so that output from python and c + # table reader uses the format of the passed string. For consistency, + # encode strings for python engine so that output from python and c # engines produce consistent results if kwargs.get('engine') == 'python' and compat.PY2: text = text.encode('utf-8') @@ -121,7 +121,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover # clipboard_set (pyperclip) expects unicode obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs) text = buf.getvalue() - if PY2: + if compat.PY2: text = text.decode('utf-8') clipboard_set(text) return @@ -129,6 +129,8 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover warnings.warn('to_clipboard in excel mode requires a single ' 'character separator. Set "excel=False" or change ' 'the separator') + elif sep is not None: + warnings.warn('to_clipboard with excel=False ignores the sep argument') if isinstance(obj, DataFrame): # str(df) has various unhelpful defaults, like truncation From 5013d67907c69944ed8da8d23dd125b9c25b53e8 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 27 Jun 2018 11:08:14 -0400 Subject: [PATCH 17/21] Removed xfail, add whatsnew --- doc/source/whatsnew/v0.23.2.txt | 1 + pandas/tests/io/test_clipboard.py | 16 ---------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 9c4b408a1d24b..7e79e22ffdb8d 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -113,6 +113,7 @@ Bug Fixes - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`) - Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`) +- But in :func:`to_clipboard` that always copied dataframes with space delimited instead of tab delimited (:issue:`21104`) - **Plotting** diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 80fddd50fc9a8..a6b331685e72a 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -88,8 +88,6 @@ def check_round_trip_frame(self, data, excel=None, sep=None, tm.assert_frame_equal(data, result, check_dtype=False) # Test that default arguments copy as tab delimited - @pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' - 'Issue in #21104, Fixed in #21111') def test_round_trip_frame(self, df): self.check_round_trip_frame(df) @@ -99,10 +97,6 @@ def test_round_trip_frame_sep(self, df, sep): self.check_round_trip_frame(df, sep=sep) # Test white space separator - @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " - "aren't handled correctly in default c engine. Fixed " - "in #21111 by defaulting to python engine for " - "whitespace separator") def test_round_trip_frame_string(self, df): df.to_clipboard(excel=False, sep=None) result = read_clipboard() @@ -111,21 +105,17 @@ def test_round_trip_frame_string(self, df): # Two character separator is not supported in to_clipboard # Test that multi-character separators are not silently passed - @pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") def test_excel_sep_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=True, sep=r'\t') # Separator is ignored when excel=False and should produce a warning - @pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111") def test_copy_delim_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=False, sep='\t') # Tests that the default behavior of to_clipboard is tab # delimited and excel="True" - @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in " - "#21104, Fixed in #21111") @pytest.mark.parametrize('sep', ['\t', None, 'default']) @pytest.mark.parametrize('excel', [True, None, 'default']) def test_clipboard_copy_tabs_default(self, sep, excel, df): @@ -139,10 +129,6 @@ def test_clipboard_copy_tabs_default(self, sep, excel, df): assert clipboard_get() == df.to_csv(sep='\t') # Tests reading of white space separated tables - @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes " - "aren't handled correctly. in default c engine. Fixed " - "in #21111 by defaulting to python engine for " - "whitespace separator") @pytest.mark.parametrize('sep', [None, 'default']) @pytest.mark.parametrize('excel', [False]) def test_clipboard_copy_strings(self, sep, excel, df): @@ -193,8 +179,6 @@ def test_invalid_encoding(self, df): with pytest.raises(NotImplementedError): pd.read_clipboard(encoding='ascii') - @pytest.mark.xfail(reason='to_clipboard defaults to space delim. ' - 'Issue in #21104, Fixed in #21111') @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8']) def test_round_trip_valid_encodings(self, enc, df): self.check_round_trip_frame(df, encoding=enc) From 24a650f30be5efc0dc23ca9975742c754c3ded93 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 27 Jun 2018 11:48:57 -0400 Subject: [PATCH 18/21] Rebuild --- doc/source/whatsnew/v0.23.2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 7e79e22ffdb8d..4aaecda6d126d 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -113,7 +113,7 @@ Bug Fixes - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`) - Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`) -- But in :func:`to_clipboard` that always copied dataframes with space delimited instead of tab delimited (:issue:`21104`) +- But in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) - **Plotting** From 5db662fee43dbfc4313843fdf1dde4f62b3d5dbf Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 27 Jun 2018 13:04:33 -0400 Subject: [PATCH 19/21] Typo fixes --- doc/source/whatsnew/v0.23.2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 4aaecda6d126d..96ae10db8b0c7 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -113,7 +113,7 @@ Bug Fixes - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`) - Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`) -- But in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) +- Bug in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) - **Plotting** From 3939bf3b2d81a407511dc46333481de9db95010b Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 27 Jun 2018 19:26:08 -0400 Subject: [PATCH 20/21] permissions --- setup.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 setup.py diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 From 676a58c42f5dc38e8a5e44422e7d8df6f05495f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Jun 2018 11:40:17 +0200 Subject: [PATCH 21/21] fix warning + small edits --- doc/source/whatsnew/v0.23.2.txt | 2 +- pandas/io/clipboards.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 62b439869fdd6..1ca693755b3c6 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -57,6 +57,7 @@ Fixed Regressions - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`) - Fixed regression in unary negative operations with object dtype (:issue:`21380`) - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`) +- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) .. _whatsnew_0232.performance: @@ -115,7 +116,6 @@ Bug Fixes - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`) - Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`) -- Bug in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) - **Plotting** diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index b1cabb2096b47..b3f40b3a2429c 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -1,6 +1,6 @@ """ io on the clipboard """ from pandas import compat, get_option, option_context, DataFrame -from pandas.compat import StringIO +from pandas.compat import StringIO, PY2, PY3 import warnings @@ -33,7 +33,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover # try to decode (if needed on PY3) # Strange. linux py33 doesn't complain, win py33 does - if compat.PY3: + if PY3: try: text = compat.bytes_to_str( text, encoding=(kwargs.get('encoding') or @@ -67,14 +67,14 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover if len(sep) > 1 and kwargs.get('engine') is None: kwargs['engine'] = 'python' elif len(sep) > 1 and kwargs.get('engine') == 'c': - warnings.warn('from_clipboard with regex separator does not work' + warnings.warn('read_clipboard with regex separator does not work' ' properly with c engine') # In PY2, the c table reader first encodes text with UTF-8 but Python # table reader uses the format of the passed string. For consistency, # encode strings for python engine so that output from python and c # engines produce consistent results - if kwargs.get('engine') == 'python' and compat.PY2: + if kwargs.get('engine') == 'python' and PY2: text = text.encode('utf-8') return read_table(StringIO(text), sep=sep, **kwargs) @@ -121,14 +121,13 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover # clipboard_set (pyperclip) expects unicode obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs) text = buf.getvalue() - if compat.PY2: + if PY2: text = text.decode('utf-8') clipboard_set(text) return except TypeError: warnings.warn('to_clipboard in excel mode requires a single ' - 'character separator. Set "excel=False" or change ' - 'the separator') + 'character separator.') elif sep is not None: warnings.warn('to_clipboard with excel=False ignores the sep argument')