41
41
from datetime import datetime , date , timedelta
42
42
from dateutil .parser import parse
43
43
import os
44
- import sys
44
+ from textwrap import dedent
45
45
import warnings
46
46
47
47
import numpy as np
64
64
import pandas .core .internals as internals
65
65
66
66
from pandas .msgpack import Unpacker as _Unpacker , Packer as _Packer , ExtType
67
+ from pandas .util ._move import (
68
+ BadMove as _BadMove ,
69
+ move_into_mutable_buffer as _move_into_mutable_buffer ,
70
+ )
71
+
72
+ # check whcih compression libs we have installed
73
+ try :
74
+ import zlib
75
+
76
+ def _check_zlib ():
77
+ pass
78
+ except ImportError :
79
+ def _check_zlib ():
80
+ raise ValueError ('zlib is not installed' )
81
+
82
+ _check_zlib .__doc__ = dedent (
83
+ """\
84
+ Check if zlib is installed.
85
+
86
+ Raises
87
+ ------
88
+ ValueError
89
+ Raised when zlib is not installed.
90
+ """ ,
91
+ )
92
+
93
+ try :
94
+ import blosc
95
+
96
+ def _check_blosc ():
97
+ pass
98
+ except ImportError :
99
+ def _check_blosc ():
100
+ raise ValueError ('blosc is not installed' )
101
+
102
+ _check_blosc .__doc__ = dedent (
103
+ """\
104
+ Check if blosc is installed.
105
+
106
+ Raises
107
+ ------
108
+ ValueError
109
+ Raised when blosc is not installed.
110
+ """ ,
111
+ )
67
112
68
113
# until we can pass this into our conversion functions,
69
114
# this is pretty hacky
@@ -221,102 +266,31 @@ def convert(values):
221
266
return v .tolist ()
222
267
223
268
if compressor == 'zlib' :
269
+ _check_zlib ()
224
270
225
271
# return string arrays like they are
226
272
if dtype == np .object_ :
227
273
return v .tolist ()
228
274
229
275
# convert to a bytes array
230
276
v = v .tostring ()
231
- import zlib
232
277
return ExtType (0 , zlib .compress (v ))
233
278
234
279
elif compressor == 'blosc' :
280
+ _check_blosc ()
235
281
236
282
# return string arrays like they are
237
283
if dtype == np .object_ :
238
284
return v .tolist ()
239
285
240
286
# convert to a bytes array
241
287
v = v .tostring ()
242
- import blosc
243
288
return ExtType (0 , blosc .compress (v , typesize = dtype .itemsize ))
244
289
245
290
# ndarray (on original dtype)
246
291
return ExtType (0 , v .tostring ())
247
292
248
293
249
- class _BadMove (ValueError ):
250
- """Exception used to indicate that a move was attempted on a value with
251
- more than a single reference.
252
-
253
- Parameters
254
- ----------
255
- data : any
256
- The data which was passed to ``_move_into_mutable_buffer``.
257
-
258
- See Also
259
- --------
260
- _move_into_mutable_buffer
261
- """
262
- def __init__ (self , data ):
263
- self .data = data
264
-
265
- def __str__ (self ):
266
- return 'cannot move data from a named object'
267
-
268
-
269
- def _move_into_mutable_buffer (bytes_rvalue ):
270
- """Moves a bytes object that is about to be destroyed into a mutable buffer
271
- without copying the data.
272
-
273
- Parameters
274
- ----------
275
- bytes_rvalue : bytes with 1 refcount.
276
- The bytes object that you want to move into a mutable buffer. This
277
- cannot be a named object. It must only have a single reference.
278
-
279
- Returns
280
- -------
281
- buf : memoryview
282
- A mutable buffer that was previously used as that data for
283
- ``bytes_rvalue``.
284
-
285
- Raises
286
- ------
287
- _BadMove
288
- Raised when a move is attempted on an object with more than one
289
- reference.
290
-
291
- Notes
292
- -----
293
- If you want to use this function you are probably wrong.
294
- """
295
- if sys .getrefcount (bytes_rvalue ) != 3 :
296
- # The three references are:
297
- # 1. The callers stack (this is the only external reference)
298
- # 2. The locals for this function
299
- # 3. This function's stack (to pass to `sys.getrefcount`)
300
- raise _BadMove (bytes_rvalue )
301
-
302
- # create a numpy array from the memory of `bytes_rvalue`
303
- arr = np .frombuffer (bytes_rvalue , dtype = np .int8 )
304
- try :
305
- # mark this array as mutable
306
- arr .flags .writeable = True
307
- # At this point any mutations to `arr` will invalidate `bytes_rvalue`.
308
- # This would be fine but `np.frombuffer` is going to store this object
309
- # on `arr.base`. In order to preserve user's sanity we are going to
310
- # destroy `arr` to drop the final reference to `bytes_rvalue` and just
311
- # return a `memoryview` of the now mutable data. This dance is very
312
- # fast and makes it impossible for users to shoot themselves in the
313
- # foot.
314
- return memoryview (arr )
315
- finally :
316
- # assure that our mutable view is destroyed even if we raise
317
- del arr
318
-
319
-
320
294
def unconvert (values , dtype , compress = None ):
321
295
322
296
as_is_ext = isinstance (values , ExtType ) and values .code == 0
@@ -334,9 +308,11 @@ def unconvert(values, dtype, compress=None):
334
308
335
309
if compress :
336
310
if compress == u'zlib' :
337
- from zlib import decompress
311
+ _check_zlib ()
312
+ decompress = zlib .decompress
338
313
elif compress == u'blosc' :
339
- from blosc import decompress
314
+ _check_blosc ()
315
+ decompress = blosc .decompress
340
316
else :
341
317
raise ValueError ("compress must be one of 'zlib' or 'blosc'" )
342
318
@@ -350,7 +326,7 @@ def unconvert(values, dtype, compress=None):
350
326
# We don't just store this in the locals because we want to
351
327
# minimize the risk of giving users access to a `bytes` object
352
328
# whose data is also given to a mutable buffer.
353
- values = e .data
329
+ values = e .args [ 0 ]
354
330
if len (values ) > 1 :
355
331
# The empty string and single characters are memoized in many
356
332
# string creating functions in the capi. This case should not
0 commit comments