Skip to content

Commit 89dd4d6

Browse files
david-liu-brattle-1jreback
authored andcommitted
BUG: to_clipboard text truncated for Python 3 on Windows for UTF-16 text (#25040)
1 parent 25ff472 commit 89dd4d6

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ MultiIndex
165165
I/O
166166
^^^
167167

168+
- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
168169
-
169170
-
170171
-

pandas/io/clipboard/windows.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def init_windows_clipboard():
2929
HINSTANCE, HMENU, BOOL, UINT, HANDLE)
3030

3131
windll = ctypes.windll
32+
msvcrt = ctypes.CDLL('msvcrt')
3233

3334
safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA)
3435
safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT,
@@ -71,6 +72,10 @@ def init_windows_clipboard():
7172
safeGlobalUnlock.argtypes = [HGLOBAL]
7273
safeGlobalUnlock.restype = BOOL
7374

75+
wcslen = CheckedCall(msvcrt.wcslen)
76+
wcslen.argtypes = [c_wchar_p]
77+
wcslen.restype = UINT
78+
7479
GMEM_MOVEABLE = 0x0002
7580
CF_UNICODETEXT = 13
7681

@@ -129,13 +134,13 @@ def copy_windows(text):
129134
# If the hMem parameter identifies a memory object,
130135
# the object must have been allocated using the
131136
# function with the GMEM_MOVEABLE flag.
132-
count = len(text) + 1
137+
count = wcslen(text) + 1
133138
handle = safeGlobalAlloc(GMEM_MOVEABLE,
134139
count * sizeof(c_wchar))
135140
locked_handle = safeGlobalLock(handle)
136141

137-
ctypes.memmove(c_wchar_p(locked_handle),
138-
c_wchar_p(text), count * sizeof(c_wchar))
142+
ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text),
143+
count * sizeof(c_wchar))
139144

140145
safeGlobalUnlock(handle)
141146
safeSetClipboardData(CF_UNICODETEXT, handle)

pandas/tests/io/test_clipboard.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.util import testing as tm
1313
from pandas.util.testing import makeCustomDataframe as mkdf
1414

15+
from pandas.io.clipboard import clipboard_get, clipboard_set
1516
from pandas.io.clipboard.exceptions import PyperclipException
1617

1718
try:
@@ -30,8 +31,8 @@ def build_kwargs(sep, excel):
3031
return kwargs
3132

3233

33-
@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
34-
'colwidth', 'mixed', 'float', 'int'])
34+
@pytest.fixture(params=['delims', 'utf8', 'utf16', 'string', 'long',
35+
'nonascii', 'colwidth', 'mixed', 'float', 'int'])
3536
def df(request):
3637
data_type = request.param
3738

@@ -41,6 +42,10 @@ def df(request):
4142
elif data_type == 'utf8':
4243
return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
4344
'b': ['øπ∆˚¬', 'œ∑´®']})
45+
elif data_type == 'utf16':
46+
return pd.DataFrame({'a': ['\U0001f44d\U0001f44d',
47+
'\U0001f44d\U0001f44d'],
48+
'b': ['abc', 'def']})
4449
elif data_type == 'string':
4550
return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
4651
c_idx_names=[None], r_idx_names=[None])
@@ -225,3 +230,14 @@ def test_invalid_encoding(self, df):
225230
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
226231
def test_round_trip_valid_encodings(self, enc, df):
227232
self.check_round_trip_frame(df, encoding=enc)
233+
234+
235+
@pytest.mark.single
236+
@pytest.mark.clipboard
237+
@pytest.mark.skipif(not _DEPS_INSTALLED,
238+
reason="clipboard primitives not installed")
239+
@pytest.mark.parametrize('data', [u'\U0001f44d...', u'Ωœ∑´...', 'abcd...'])
240+
def test_raw_roundtrip(data):
241+
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
242+
clipboard_set(data)
243+
assert data == clipboard_get()

0 commit comments

Comments
 (0)