@@ -246,6 +246,77 @@ def convert(values):
246
246
return ExtType (0 , v .tostring ())
247
247
248
248
249
+ class _BadMove (ValueError ):
250
+ """Exception used to indicate that a move was attempted on a value with
251
+ more than a single reference.
252
+
253
+ Parameters
254
+ ----------
255
+ data : any
256
+ The data which was passed to ``_move_into_mutable_buffer``.
257
+
258
+ See Also
259
+ --------
260
+ _move_into_mutable_buffer
261
+ """
262
+ def __init__ (self , data ):
263
+ self .data = data
264
+
265
+ def __str__ (self ):
266
+ return 'cannot move data from a named object'
267
+
268
+
269
+ def _move_into_mutable_buffer (bytes_rvalue ):
270
+ """Moves a bytes object that is about to be destroyed into a mutable buffer
271
+ without copying the data.
272
+
273
+ Parameters
274
+ ----------
275
+ bytes_rvalue : bytes with 1 refcount.
276
+ The bytes object that you want to move into a mutable buffer. This
277
+ cannot be a named object. It must only have a single reference.
278
+
279
+ Returns
280
+ -------
281
+ buf : memoryview
282
+ A mutable buffer that was previously used as that data for
283
+ ``bytes_rvalue``.
284
+
285
+ Raises
286
+ ------
287
+ _BadMove
288
+ Raised when a move is attempted on an object with more than one
289
+ reference.
290
+
291
+ Notes
292
+ -----
293
+ If you want to use this function you are probably wrong.
294
+ """
295
+ if sys .getrefcount (bytes_rvalue ) != 3 :
296
+ # The three references are:
297
+ # 1. The callers stack (this is the only external reference)
298
+ # 2. The locals for this function
299
+ # 3. This function's stack (to pass to `sys.getrefcount`)
300
+ raise _BadMove (bytes_rvalue )
301
+
302
+ # create a numpy array from the memory of `bytes_rvalue`
303
+ arr = np .frombuffer (bytes_rvalue , dtype = np .int8 )
304
+ try :
305
+ # mark this array as mutable
306
+ arr .flags .writeable = True
307
+ # At this point any mutations to `arr` will invalidate `bytes_rvalue`.
308
+ # This would be fine but `np.frombuffer` is going to store this object
309
+ # on `arr.base`. In order to preserve user's sanity we are going to
310
+ # destroy `arr` to drop the final reference to `bytes_rvalue` and just
311
+ # return a `memoryview` of the now mutable data. This dance is very
312
+ # fast and makes it impossible for users to shoot themselves in the
313
+ # foot.
314
+ return memoryview (arr )
315
+ finally :
316
+ # assure that our mutable view is destroyed even if we raise
317
+ del arr
318
+
319
+
249
320
def unconvert (values , dtype , compress = None ):
250
321
251
322
as_is_ext = isinstance (values , ExtType ) and values .code == 0
@@ -269,30 +340,28 @@ def unconvert(values, dtype, compress=None):
269
340
else :
270
341
raise ValueError ("compress must be one of 'zlib' or 'blosc'" )
271
342
272
- values = decompress (values )
273
- if sys .getrefcount (values ) == 2 :
274
- arr = np .frombuffer (values , dtype = dtype )
275
- # We are setting the memory owned by a bytes object as mutable.
276
- # We can do this because we know that no one has a reference to
277
- # this object since it was just created in the call to
278
- # decompress and we have checked that we have the only
279
- # reference. the refcnt reports as 2 instead of 1 because we
280
- # incref the values object when we push it on the stack to call
281
- # getrefcnt. The 2 references are then the local variable
282
- # `values` and TOS.
283
- arr .flags .writeable = True
284
- return arr
285
- elif len (values ) > 1 :
286
- # The empty string and single characters are memoized in many
287
- # string creating functions in the capi. This case should not warn
288
- # even though we need to make a copy because we are only copying at
289
- # most 1 byte.
290
- warnings .warn (
291
- 'copying data after decompressing; this may mean that'
292
- ' decompress is caching its result' ,
293
- PerformanceWarning ,
343
+ try :
344
+ return np .frombuffer (
345
+ _move_into_mutable_buffer (decompress (values )),
346
+ dtype = dtype ,
294
347
)
295
- # fall through to copying `np.fromstring`
348
+ except _BadMove as e :
349
+ # Pull the decompressed data off of the `_BadMove` exception.
350
+ # We don't just store this in the locals because we want to
351
+ # minimize the risk of giving users access to a `bytes` object
352
+ # whose data is also given to a mutable buffer.
353
+ values = e .data
354
+ if len (values ) > 1 :
355
+ # The empty string and single characters are memoized in many
356
+ # string creating functions in the capi. This case should not
357
+ # warn even though we need to make a copy because we are only
358
+ # copying at most 1 byte.
359
+ warnings .warn (
360
+ 'copying data after decompressing; this may mean that'
361
+ ' decompress is caching its result' ,
362
+ PerformanceWarning ,
363
+ )
364
+ # fall through to copying `np.fromstring`
296
365
297
366
# Copy the string into a numpy array.
298
367
return np .fromstring (values , dtype = dtype )
0 commit comments