Skip to content

Commit 407035e

Browse files
committed
Add BZ2File wrapper for pickle protocol 5
1 parent 67d75f3 commit 407035e

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

pandas/io/common.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import mmap
2323
import os
2424
from pathlib import Path
25+
from pickle import PickleBuffer
2526
import re
2627
import tarfile
2728
from typing import (
@@ -762,9 +763,9 @@ def get_handle(
762763

763764
# BZ Compression
764765
elif compression == "bz2":
765-
# No overload variant of "BZ2File" matches argument types
766+
# Overload of "BZ2File" to handle pickle protocol 5
766767
# "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
767-
handle = bz2.BZ2File( # type: ignore[call-overload]
768+
handle = _BZ2File( # type: ignore[call-overload]
768769
handle,
769770
mode=ioargs.mode,
770771
**compression_args,
@@ -1002,6 +1003,22 @@ def write_to_buffer(self) -> None:
10021003
self.buffer.addfile(tarinfo, self)
10031004

10041005

1006+
class _BZ2File(bz2.BZ2File):
1007+
def write(self, b) -> None:
1008+
if isinstance(b, PickleBuffer):
1009+
# Workaround issue where `bz2.BZ2File` expects `len`
1010+
# to return the number of bytes in `b` by converting
1011+
# `b` into something that meets that constraint with
1012+
# minimal copying.
1013+
try:
1014+
# coerce to 1-D `uint8` C-contiguous `memoryview` zero-copy
1015+
b = b.raw()
1016+
except BufferError:
1017+
# perform in-memory copy if buffer is not contiguous
1018+
b = bytes(b)
1019+
return super().write(b)
1020+
1021+
10051022
class _BytesZipFile(_BufferedWriter):
10061023
def __init__(
10071024
self,

0 commit comments

Comments
 (0)