|
13 | 13 | from pandas import compat, lib
|
14 | 14 | from pandas.compat import range
|
15 | 15 |
|
16 |
| -from pandas._sparse import BlockIndex, IntIndex |
| 16 | +from pandas._sparse import SparseIndex, BlockIndex, IntIndex |
17 | 17 | import pandas._sparse as splib
|
18 | 18 | import pandas.index as _index
|
19 | 19 | import pandas.core.ops as ops
|
20 | 20 | import pandas.formats.printing as printing
|
| 21 | +from pandas.util.decorators import Appender |
| 22 | +from pandas.indexes.base import _index_shared_docs |
| 23 | + |
| 24 | + |
| 25 | +_sparray_doc_kwargs = dict(klass='SparseArray') |
21 | 26 |
|
22 | 27 |
|
23 | 28 | def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None,
|
@@ -167,10 +172,19 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
|
167 | 172 | fill_value = bool(fill_value)
|
168 | 173 |
|
169 | 174 | # Change the class of the array to be the subclass type.
|
170 |
| - output = subarr.view(cls) |
171 |
| - output.sp_index = sparse_index |
172 |
| - output.fill_value = fill_value |
173 |
| - return output |
| 175 | + return cls._simple_new(subarr, sparse_index, fill_value) |
| 176 | + |
| 177 | + @classmethod |
| 178 | + def _simple_new(cls, data, sp_index, fill_value): |
| 179 | + result = data.view(cls) |
| 180 | + |
| 181 | + if not isinstance(sp_index, SparseIndex): |
| 182 | + # caller must pass SparseIndex |
| 183 | + raise ValueError('sp_index must be a SparseIndex') |
| 184 | + |
| 185 | + result.sp_index = sp_index |
| 186 | + result.fill_value = fill_value |
| 187 | + return result |
174 | 188 |
|
175 | 189 | @property
|
176 | 190 | def _constructor(self):
|
@@ -308,46 +322,53 @@ def _get_val_at(self, loc):
|
308 | 322 | else:
|
309 | 323 | return _index.get_value_at(self, sp_loc)
|
310 | 324 |
|
311 |
| - def take(self, indices, axis=0): |
312 |
| - """ |
313 |
| - Sparse-compatible version of ndarray.take |
| 325 | + @Appender(_index_shared_docs['take'] % _sparray_doc_kwargs) |
| 326 | + def take(self, indices, axis=0, allow_fill=True, |
| 327 | + fill_value=None): |
| 328 | + |
| 329 | + # Sparse-compatible version of ndarray.take, returns SparseArray |
314 | 330 |
|
315 |
| - Returns |
316 |
| - ------- |
317 |
| - taken : ndarray |
318 |
| - """ |
319 | 331 | if axis:
|
320 | 332 | raise ValueError("axis must be 0, input was {0}".format(axis))
|
321 | 333 |
|
322 | 334 | if com.is_integer(indices):
|
323 | 335 | # return scalar
|
324 | 336 | return self[indices]
|
325 | 337 |
|
326 |
| - indices = np.atleast_1d(np.asarray(indices, dtype=int)) |
327 |
| - |
328 |
| - # allow -1 to indicate missing values |
| 338 | + indices = com._ensure_platform_int(indices) |
329 | 339 | n = len(self)
|
330 |
| - if ((indices >= n) | (indices < -1)).any(): |
331 |
| - raise IndexError('out of bounds access') |
332 |
| - |
333 |
| - if self.sp_index.npoints > 0: |
334 |
| - locs = np.array([self.sp_index.lookup(loc) if loc > -1 else -1 |
335 |
| - for loc in indices]) |
336 |
| - result = self.sp_values.take(locs) |
337 |
| - mask = locs == -1 |
338 |
| - if mask.any(): |
339 |
| - try: |
340 |
| - result[mask] = self.fill_value |
341 |
| - except ValueError: |
342 |
| - # wrong dtype |
343 |
| - result = result.astype('float64') |
344 |
| - result[mask] = self.fill_value |
345 |
| - |
| 340 | + if allow_fill and fill_value is not None: |
| 341 | + # allow -1 to indicate self.fill_value, |
| 342 | + # self.fill_value may not be NaN |
| 343 | + if (indices < -1).any(): |
| 344 | + msg = ('When allow_fill=True and fill_value is not None, ' |
| 345 | + 'all indices must be >= -1') |
| 346 | + raise ValueError(msg) |
| 347 | + elif (n <= indices).any(): |
| 348 | + msg = 'index is out of bounds for size {0}' |
| 349 | + raise IndexError(msg.format(n)) |
| 350 | + else: |
| 351 | + if ((indices < -n) | (n <= indices)).any(): |
| 352 | + msg = 'index is out of bounds for size {0}' |
| 353 | + raise IndexError(msg.format(n)) |
| 354 | + |
| 355 | + indices = indices.astype(np.int32) |
| 356 | + if not (allow_fill and fill_value is not None): |
| 357 | + indices = indices.copy() |
| 358 | + indices[indices < 0] += n |
| 359 | + |
| 360 | + locs = self.sp_index.lookup_array(indices) |
| 361 | + indexer = np.arange(len(locs), dtype=np.int32) |
| 362 | + mask = locs != -1 |
| 363 | + if mask.any(): |
| 364 | + indexer = indexer[mask] |
| 365 | + new_values = self.sp_values.take(locs[mask]) |
346 | 366 | else:
|
347 |
| - result = np.empty(len(indices)) |
348 |
| - result.fill(self.fill_value) |
| 367 | + indexer = np.empty(shape=(0, ), dtype=np.int32) |
| 368 | + new_values = np.empty(shape=(0, ), dtype=self.sp_values.dtype) |
349 | 369 |
|
350 |
| - return self._constructor(result) |
| 370 | + sp_index = _make_index(len(indices), indexer, kind=self.sp_index) |
| 371 | + return self._simple_new(new_values, sp_index, self.fill_value) |
351 | 372 |
|
352 | 373 | def __setitem__(self, key, value):
|
353 | 374 | # if com.is_integer(key):
|
@@ -525,16 +546,21 @@ def make_sparse(arr, kind='block', fill_value=nan):
|
525 | 546 | else:
|
526 | 547 | indices = np.arange(length, dtype=np.int32)[mask]
|
527 | 548 |
|
528 |
| - if kind == 'block': |
| 549 | + index = _make_index(length, indices, kind) |
| 550 | + sparsified_values = arr[mask] |
| 551 | + return sparsified_values, index |
| 552 | + |
| 553 | + |
| 554 | +def _make_index(length, indices, kind): |
| 555 | + |
| 556 | + if kind == 'block' or isinstance(kind, BlockIndex): |
529 | 557 | locs, lens = splib.get_blocks(indices)
|
530 | 558 | index = BlockIndex(length, locs, lens)
|
531 |
| - elif kind == 'integer': |
| 559 | + elif kind == 'integer' or isinstance(kind, IntIndex): |
532 | 560 | index = IntIndex(length, indices)
|
533 | 561 | else: # pragma: no cover
|
534 | 562 | raise ValueError('must be block or integer type')
|
535 |
| - |
536 |
| - sparsified_values = arr[mask] |
537 |
| - return sparsified_values, index |
| 563 | + return index |
538 | 564 |
|
539 | 565 |
|
540 | 566 | ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method,
|
|
0 commit comments