From d17a3c573a400198dcf3ea349df6005cd8a5135b Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 30 Jan 2019 15:25:42 -0500 Subject: [PATCH 1/6] Initial commit with proposed fix --- pandas/io/clipboard/windows.py | 12 +++++++----- pandas/tests/io/test_clipboard.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/io/clipboard/windows.py b/pandas/io/clipboard/windows.py index 3d979a61b5f2d..3d8a39c33a00a 100644 --- a/pandas/io/clipboard/windows.py +++ b/pandas/io/clipboard/windows.py @@ -3,7 +3,7 @@ """ import contextlib import ctypes -from ctypes import c_size_t, c_wchar, c_wchar_p, get_errno, sizeof +from ctypes import c_size_t, c_wchar, c_wchar_p, c_char_p, get_errno, sizeof import time from .exceptions import PyperclipWindowsException @@ -129,13 +129,15 @@ def copy_windows(text): # If the hMem parameter identifies a memory object, # the object must have been allocated using the # function with the GMEM_MOVEABLE flag. - count = len(text) + 1 + text = text.encode('utf-16LE') + mem_size = len(text) + sizeof(c_wchar) handle = safeGlobalAlloc(GMEM_MOVEABLE, - count * sizeof(c_wchar)) + mem_size) + locked_handle = safeGlobalLock(handle) - ctypes.memmove(c_wchar_p(locked_handle), - c_wchar_p(text), count * sizeof(c_wchar)) + ctypes.memmove(c_char_p(locked_handle), + c_char_p(text), mem_size) safeGlobalUnlock(handle) safeSetClipboardData(CF_UNICODETEXT, handle) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 8eb26d9f3dec5..6f383fd931fc7 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -225,3 +225,16 @@ def test_invalid_encoding(self, df): @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8']) def test_round_trip_valid_encodings(self, enc, df): self.check_round_trip_frame(df, encoding=enc) + + +@pytest.mark.single +@pytest.mark.clipboard +@pytest.mark.skipif(not _DEPS_INSTALLED, + reason="clipboard primitives not installed") +class TestRawClipboard(object): + + @pytest.mark.parametrize('data', [u'\U0001f44d...', 'Ωœ∑´...', 'abcd...']) + def test_raw_roundtrip(self, data): + import pandas.io.clipboard + pandas.io.clipboard.clipboard_set(data) + assert data == pandas.io.clipboard.clipboard_get() From 2a39a50c000ac5dd69de1d79a194907559eaacb3 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 30 Jan 2019 16:55:07 -0500 Subject: [PATCH 2/6] Bug fixes --- pandas/io/clipboard/windows.py | 2 +- pandas/tests/io/test_clipboard.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/clipboard/windows.py b/pandas/io/clipboard/windows.py index 3d8a39c33a00a..742cb8fb71a16 100644 --- a/pandas/io/clipboard/windows.py +++ b/pandas/io/clipboard/windows.py @@ -3,7 +3,7 @@ """ import contextlib import ctypes -from ctypes import c_size_t, c_wchar, c_wchar_p, c_char_p, get_errno, sizeof +from ctypes import c_char_p, c_size_t, c_wchar, c_wchar_p, get_errno, sizeof import time from .exceptions import PyperclipWindowsException diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 6f383fd931fc7..ead3240c2e409 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -233,7 +233,7 @@ def test_round_trip_valid_encodings(self, enc, df): reason="clipboard primitives not installed") class TestRawClipboard(object): - @pytest.mark.parametrize('data', [u'\U0001f44d...', 'Ωœ∑´...', 'abcd...']) + @pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...']) def test_raw_roundtrip(self, data): import pandas.io.clipboard pandas.io.clipboard.clipboard_set(data) From b0c0768b370a4c2e0e8752510b79b56d434227b5 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 30 Jan 2019 18:46:22 -0500 Subject: [PATCH 3/6] Added utf-16 dataframe tests. Refactors. --- pandas/tests/io/test_clipboard.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index ead3240c2e409..a2dfe1564b3d2 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -13,7 +13,7 @@ from pandas.util.testing import makeCustomDataframe as mkdf from pandas.io.clipboard.exceptions import PyperclipException - +from pandas.io.clipboard import clipboard_get, clipboard_set try: DataFrame({'A': [1, 2]}).to_clipboard() _DEPS_INSTALLED = 1 @@ -30,8 +30,8 @@ def build_kwargs(sep, excel): return kwargs -@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii', - 'colwidth', 'mixed', 'float', 'int']) +@pytest.fixture(params=['delims', 'utf8', 'utf16', 'string', 'long', + 'nonascii', 'colwidth', 'mixed', 'float', 'int']) def df(request): data_type = request.param @@ -41,6 +41,10 @@ def df(request): elif data_type == 'utf8': return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], 'b': ['øπ∆˚¬', 'œ∑´®']}) + elif data_type == 'utf16': + return pd.DataFrame({'a': ['\U0001f44d\U0001f44d', + '\U0001f44d\U0001f44d'], + 'b': ['abc', 'def']}) elif data_type == 'string': return mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) @@ -231,10 +235,7 @@ def test_round_trip_valid_encodings(self, enc, df): @pytest.mark.clipboard @pytest.mark.skipif(not _DEPS_INSTALLED, reason="clipboard primitives not installed") -class TestRawClipboard(object): - - @pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...']) - def test_raw_roundtrip(self, data): - import pandas.io.clipboard - pandas.io.clipboard.clipboard_set(data) - assert data == pandas.io.clipboard.clipboard_get() +@pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...']) +def test_raw_roundtrip(data): + clipboard_set(data) + assert data == clipboard_get() From 92007d861369b78b5685c34875540d1733ebdb80 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Wed, 30 Jan 2019 20:11:37 -0500 Subject: [PATCH 4/6] Import formatting --- pandas/tests/io/test_clipboard.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index a2dfe1564b3d2..98951b2fb4a5b 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -12,8 +12,9 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf -from pandas.io.clipboard.exceptions import PyperclipException from pandas.io.clipboard import clipboard_get, clipboard_set +from pandas.io.clipboard.exceptions import PyperclipException + try: DataFrame({'A': [1, 2]}).to_clipboard() _DEPS_INSTALLED = 1 From 915d3b0133f86524f875311c2367928d9745a810 Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Thu, 31 Jan 2019 20:35:13 -0500 Subject: [PATCH 5/6] Used Pyperclip code --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/io/clipboard/windows.py | 17 ++++++++++------- pandas/tests/io/test_clipboard.py | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index a9fa8b2174dd0..2c07ebf6272d4 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -163,6 +163,7 @@ MultiIndex I/O ^^^ +- Fixed bug in missing text when using `to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) - - - diff --git a/pandas/io/clipboard/windows.py b/pandas/io/clipboard/windows.py index 742cb8fb71a16..4f5275af693b7 100644 --- a/pandas/io/clipboard/windows.py +++ b/pandas/io/clipboard/windows.py @@ -3,7 +3,7 @@ """ import contextlib import ctypes -from ctypes import c_char_p, c_size_t, c_wchar, c_wchar_p, get_errno, sizeof +from ctypes import c_size_t, c_wchar, c_wchar_p, get_errno, sizeof import time from .exceptions import PyperclipWindowsException @@ -29,6 +29,7 @@ def init_windows_clipboard(): HINSTANCE, HMENU, BOOL, UINT, HANDLE) windll = ctypes.windll + msvcrt = ctypes.CDLL('msvcrt') safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA) safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT, @@ -71,6 +72,10 @@ def init_windows_clipboard(): safeGlobalUnlock.argtypes = [HGLOBAL] safeGlobalUnlock.restype = BOOL + wcslen = CheckedCall(msvcrt.wcslen) + wcslen.argtypes = [c_wchar_p] + wcslen.restype = UINT + GMEM_MOVEABLE = 0x0002 CF_UNICODETEXT = 13 @@ -129,15 +134,13 @@ def copy_windows(text): # If the hMem parameter identifies a memory object, # the object must have been allocated using the # function with the GMEM_MOVEABLE flag. - text = text.encode('utf-16LE') - mem_size = len(text) + sizeof(c_wchar) + count = wcslen(text) + 1 handle = safeGlobalAlloc(GMEM_MOVEABLE, - mem_size) - + count * sizeof(c_wchar)) locked_handle = safeGlobalLock(handle) - ctypes.memmove(c_char_p(locked_handle), - c_char_p(text), mem_size) + ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text), + count * sizeof(c_wchar)) safeGlobalUnlock(handle) safeSetClipboardData(CF_UNICODETEXT, handle) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 98951b2fb4a5b..565db92210b0a 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -238,5 +238,6 @@ def test_round_trip_valid_encodings(self, enc, df): reason="clipboard primitives not installed") @pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...']) def test_raw_roundtrip(data): + # PR #25040 wide unicode wasn't copied correctly on PY3 on windows clipboard_set(data) assert data == clipboard_get() From 0c1b914321cda59ee112abaccf031887c453252c Mon Sep 17 00:00:00 2001 From: david-liu-brattle-1 Date: Fri, 1 Feb 2019 14:32:12 -0500 Subject: [PATCH 6/6] whatsnew change --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2c07ebf6272d4..880eaed3b5dfb 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -163,7 +163,7 @@ MultiIndex I/O ^^^ -- Fixed bug in missing text when using `to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) +- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) - - -