Skip to content

Commit b42a2c9

Browse files
committed
CLN: move diff, take to algorithms.py
1 parent a214e82 commit b42a2c9

20 files changed

+1105
-1067
lines changed

pandas/core/algorithms.py

+538-90
Large diffs are not rendered by default.

pandas/core/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pandas import compat, lib
88
from pandas.compat import u
99

10-
from pandas.core.algorithms import factorize
10+
from pandas.core.algorithms import factorize, take_1d
1111
from pandas.core.base import (PandasObject, PandasDelegate,
1212
NoNewAttributesMixin, _shared_docs)
1313
import pandas.core.common as com
@@ -20,7 +20,7 @@
2020
is_dtype_equal, is_categorical_dtype, is_integer_dtype,
2121
_possibly_infer_to_datetimelike, get_dtype_kinds, is_list_like,
2222
is_sequence, is_null_slice, is_bool, _ensure_object, _ensure_int64,
23-
_coerce_indexer_dtype, take_1d)
23+
_coerce_indexer_dtype)
2424
from pandas.types.api import CategoricalDtype
2525
from pandas.util.terminal import get_terminal_size
2626
from pandas.core.config import get_option

pandas/core/common.py

-448
Large diffs are not rendered by default.

pandas/core/frame.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from pandas.core.series import Series
4040
from pandas.core.categorical import Categorical
4141
import pandas.computation.expressions as expressions
42+
import pandas.core.algorithms as algos
4243
from pandas.computation.eval import eval as _eval
4344
from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
4445
OrderedDict, raise_with_traceback)
@@ -2709,8 +2710,8 @@ def _reindex_multi(self, axes, copy, fill_value):
27092710

27102711
if row_indexer is not None and col_indexer is not None:
27112712
indexer = row_indexer, col_indexer
2712-
new_values = com.take_2d_multi(self.values, indexer,
2713-
fill_value=fill_value)
2713+
new_values = algos.take_2d_multi(self.values, indexer,
2714+
fill_value=fill_value)
27142715
return self._constructor(new_values, index=new_index,
27152716
columns=new_columns)
27162717
else:
@@ -3084,11 +3085,11 @@ def duplicated(self, subset=None, keep='first'):
30843085
duplicated : Series
30853086
"""
30863087
from pandas.core.groupby import get_group_index
3087-
from pandas.core.algorithms import factorize
30883088
from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT
30893089

30903090
def f(vals):
3091-
labels, shape = factorize(vals, size_hint=min(len(self),
3091+
labels, shape = algos.factorize(vals,
3092+
size_hint=min(len(self),
30923093
_SIZE_HINT_LIMIT))
30933094
return labels.astype('i8', copy=False), len(shape)
30943095

@@ -5436,7 +5437,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
54365437
indexer = indexer_cache[id(index)] = index.get_indexer(columns)
54375438

54385439
values = _values_from_object(s)
5439-
aligned_values.append(com.take_1d(values, indexer))
5440+
aligned_values.append(algos.take_1d(values, indexer))
54405441

54415442
values = np.vstack(aligned_values)
54425443

pandas/core/groupby.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,7 @@ def shift(self, periods=1, freq=None, axis=0):
13511351

13521352
output = {}
13531353
for name, obj in self._iterate_slices():
1354-
output[name] = com.take_nd(obj.values, indexer)
1354+
output[name] = algos.take_nd(obj.values, indexer)
13551355

13561356
return self._wrap_transformed_output(output)
13571357

@@ -1873,7 +1873,7 @@ def _aggregate_series_fast(self, obj, func):
18731873
dummy = obj._get_values(slice(None, 0)).to_dense()
18741874
indexer = _get_group_index_sorter(group_index, ngroups)
18751875
obj = obj.take(indexer, convert=False)
1876-
group_index = com.take_nd(group_index, indexer, allow_fill=False)
1876+
group_index = algos.take_nd(group_index, indexer, allow_fill=False)
18771877
grouper = lib.SeriesGrouper(obj, func, group_index, ngroups,
18781878
dummy)
18791879
result, counts = grouper.get_result()
@@ -3850,7 +3850,7 @@ def __init__(self, data, labels, ngroups, axis=0):
38503850
@cache_readonly
38513851
def slabels(self):
38523852
# Sorted labels
3853-
return com.take_nd(self.labels, self.sort_idx, allow_fill=False)
3853+
return algos.take_nd(self.labels, self.sort_idx, allow_fill=False)
38543854

38553855
@cache_readonly
38563856
def sort_idx(self):
@@ -4278,11 +4278,11 @@ def _reorder_by_uniques(uniques, labels):
42784278
mask = labels < 0
42794279

42804280
# move labels to right locations (ie, unsort ascending labels)
4281-
labels = com.take_nd(reverse_indexer, labels, allow_fill=False)
4281+
labels = algos.take_nd(reverse_indexer, labels, allow_fill=False)
42824282
np.putmask(labels, mask, -1)
42834283

42844284
# sort observed ids
4285-
uniques = com.take_nd(uniques, sorter, allow_fill=False)
4285+
uniques = algos.take_nd(uniques, sorter, allow_fill=False)
42864286

42874287
return uniques, labels
42884288

pandas/core/internals.py

+24-23
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
_maybe_convert_string_to_object,
2121
is_categorical, is_datetimelike_v_numeric,
2222
is_numeric_v_string_like, is_internal_type)
23+
import pandas.core.algorithms as algos
2324
from pandas.types.api import DatetimeTZDtype
2425

2526
from pandas.core.index import Index, MultiIndex, _ensure_index
@@ -286,8 +287,8 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
286287
if fill_value is None:
287288
fill_value = self.fill_value
288289

289-
new_values = com.take_nd(self.values, indexer, axis,
290-
fill_value=fill_value, mask_info=mask_info)
290+
new_values = algos.take_nd(self.values, indexer, axis,
291+
fill_value=fill_value, mask_info=mask_info)
291292
return self.make_block(new_values, fastpath=True)
292293

293294
def get(self, item):
@@ -974,7 +975,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
974975
975976
"""
976977

977-
# com.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
978+
# algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
978979
# so need to preserve types
979980
# sparse is treated like an ndarray, but needs .get_values() shaping
980981

@@ -984,12 +985,12 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
984985

985986
if fill_tuple is None:
986987
fill_value = self.fill_value
987-
new_values = com.take_nd(values, indexer, axis=axis,
988-
allow_fill=False)
988+
new_values = algos.take_nd(values, indexer, axis=axis,
989+
allow_fill=False)
989990
else:
990991
fill_value = fill_tuple[0]
991-
new_values = com.take_nd(values, indexer, axis=axis,
992-
allow_fill=True, fill_value=fill_value)
992+
new_values = algos.take_nd(values, indexer, axis=axis,
993+
allow_fill=True, fill_value=fill_value)
993994

994995
if new_mgr_locs is None:
995996
if axis == 0:
@@ -1008,7 +1009,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
10081009

10091010
def diff(self, n, axis=1, mgr=None):
10101011
""" return block for the diff of the values """
1011-
new_values = com.diff(self.values, n, axis=axis)
1012+
new_values = algos.diff(self.values, n, axis=axis)
10121013
return [self.make_block(values=new_values, fastpath=True)]
10131014

10141015
def shift(self, periods, axis=0, mgr=None):
@@ -2711,11 +2712,11 @@ def get_ftype_counts(self):
27112712

27122713
def get_dtypes(self):
27132714
dtypes = np.array([blk.dtype for blk in self.blocks])
2714-
return com.take_1d(dtypes, self._blknos, allow_fill=False)
2715+
return algos.take_1d(dtypes, self._blknos, allow_fill=False)
27152716

27162717
def get_ftypes(self):
27172718
ftypes = np.array([blk.ftype for blk in self.blocks])
2718-
return com.take_1d(ftypes, self._blknos, allow_fill=False)
2719+
return algos.take_1d(ftypes, self._blknos, allow_fill=False)
27192720

27202721
def __getstate__(self):
27212722
block_values = [b.values for b in self.blocks]
@@ -3070,8 +3071,8 @@ def combine(self, blocks, copy=True):
30703071
new_blocks = []
30713072
for b in blocks:
30723073
b = b.copy(deep=copy)
3073-
b.mgr_locs = com.take_1d(inv_indexer, b.mgr_locs.as_array, axis=0,
3074-
allow_fill=False)
3074+
b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
3075+
axis=0, allow_fill=False)
30753076
new_blocks.append(b)
30763077

30773078
new_axes = list(self.axes)
@@ -3451,8 +3452,8 @@ def value_getitem(placement):
34513452
new_blknos.fill(-1)
34523453
new_blknos[~is_deleted] = np.arange(self.nblocks -
34533454
len(removed_blknos))
3454-
self._blknos = com.take_1d(new_blknos, self._blknos, axis=0,
3455-
allow_fill=False)
3455+
self._blknos = algos.take_1d(new_blknos, self._blknos, axis=0,
3456+
allow_fill=False)
34563457
self.blocks = tuple(blk for i, blk in enumerate(self.blocks)
34573458
if i not in set(removed_blknos))
34583459

@@ -3632,10 +3633,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
36323633
blknos = self._blknos[slobj]
36333634
blklocs = self._blklocs[slobj]
36343635
else:
3635-
blknos = com.take_1d(self._blknos, slobj, fill_value=-1,
3636-
allow_fill=allow_fill)
3637-
blklocs = com.take_1d(self._blklocs, slobj, fill_value=-1,
3638-
allow_fill=allow_fill)
3636+
blknos = algos.take_1d(self._blknos, slobj, fill_value=-1,
3637+
allow_fill=allow_fill)
3638+
blklocs = algos.take_1d(self._blklocs, slobj, fill_value=-1,
3639+
allow_fill=allow_fill)
36393640

36403641
# When filling blknos, make sure blknos is updated before appending to
36413642
# blocks list, that way new blkno is exactly len(blocks).
@@ -3847,7 +3848,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None,
38473848
else:
38483849
fill_value = np.nan
38493850

3850-
new_values = com.take_1d(values, indexer, fill_value=fill_value)
3851+
new_values = algos.take_1d(values, indexer, fill_value=fill_value)
38513852

38523853
# fill if needed
38533854
if method is not None or limit is not None:
@@ -4676,8 +4677,8 @@ def get_mgr_concatenation_plan(mgr, indexers):
46764677

46774678
if 0 in indexers:
46784679
ax0_indexer = indexers.pop(0)
4679-
blknos = com.take_1d(mgr._blknos, ax0_indexer, fill_value=-1)
4680-
blklocs = com.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1)
4680+
blknos = algos.take_1d(mgr._blknos, ax0_indexer, fill_value=-1)
4681+
blklocs = algos.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1)
46814682
else:
46824683

46834684
if mgr._is_single_block:
@@ -4932,8 +4933,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
49324933

49334934
else:
49344935
for ax, indexer in self.indexers.items():
4935-
values = com.take_nd(values, indexer, axis=ax,
4936-
fill_value=fill_value)
4936+
values = algos.take_nd(values, indexer, axis=ax,
4937+
fill_value=fill_value)
49374938

49384939
return values
49394940

pandas/core/ops.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pandas.tslib import iNaT
2020
from pandas.compat import bind_method
2121
import pandas.core.missing as missing
22+
import pandas.core.algorithms as algos
2223
from pandas.core.common import (is_list_like, notnull, isnull,
2324
_values_from_object, _maybe_match_name,
2425
needs_i8_conversion, is_datetimelike_v_numeric,
@@ -632,10 +633,10 @@ def wrapper(left, right, name=name, na_op=na_op):
632633
return_indexers=True)
633634

634635
if lidx is not None:
635-
lvalues = com.take_1d(lvalues, lidx)
636+
lvalues = algos.take_1d(lvalues, lidx)
636637

637638
if ridx is not None:
638-
rvalues = com.take_1d(rvalues, ridx)
639+
rvalues = algos.take_1d(rvalues, ridx)
639640

640641
arr = na_op(lvalues, rvalues)
641642

pandas/core/reshape.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.core.groupby import get_group_index, _compress_group_index
1919

2020
import pandas.core.common as com
21-
import pandas.algos as algos
21+
import pandas.core.algorithms as algos
22+
import pandas.algos as _algos
2223

2324
from pandas.core.index import MultiIndex, _get_na_value
2425

@@ -109,10 +110,10 @@ def _make_sorted_values_labels(self):
109110
comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
110111
ngroups = len(obs_ids)
111112

112-
indexer = algos.groupsort_indexer(comp_index, ngroups)[0]
113+
indexer = _algos.groupsort_indexer(comp_index, ngroups)[0]
113114
indexer = _ensure_platform_int(indexer)
114115

115-
self.sorted_values = com.take_nd(self.values, indexer, axis=0)
116+
self.sorted_values = algos.take_nd(self.values, indexer, axis=0)
116117
self.sorted_labels = [l.take(indexer) for l in to_sort]
117118

118119
def _make_selectors(self):
@@ -155,7 +156,7 @@ def get_result(self):
155156
# rare case, level values not observed
156157
if len(obs_ids) < self.full_shape[1]:
157158
inds = (value_mask.sum(0) > 0).nonzero()[0]
158-
values = com.take_nd(values, inds, axis=1)
159+
values = algos.take_nd(values, inds, axis=1)
159160
columns = columns[inds]
160161

161162
# may need to coerce categoricals here

pandas/core/series.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545

4646
import pandas.core.ops as ops
47-
from pandas.core import algorithms
47+
import pandas.core.algorithms as algos
4848

4949
import pandas.core.common as com
5050
import pandas.core.datetools as datetools
@@ -1202,7 +1202,7 @@ def mode(self):
12021202
modes : Series (sorted)
12031203
"""
12041204
# TODO: Add option for bins like value_counts()
1205-
return algorithms.mode(self)
1205+
return algos.mode(self)
12061206

12071207
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
12081208
False: 'first'})
@@ -1424,7 +1424,7 @@ def diff(self, periods=1):
14241424
-------
14251425
diffed : Series
14261426
"""
1427-
result = com.diff(_values_from_object(self), periods)
1427+
result = algos.diff(_values_from_object(self), periods)
14281428
return self._constructor(result, index=self.index).__finalize__(self)
14291429

14301430
def autocorr(self, lag=1):
@@ -1889,7 +1889,7 @@ def nlargest(self, n=5, keep='first'):
18891889
>>> s = pd.Series(np.random.randn(1e6))
18901890
>>> s.nlargest(10) # only sorts up to the N requested
18911891
"""
1892-
return algorithms.select_n(self, n=n, keep=keep, method='nlargest')
1892+
return algos.select_n(self, n=n, keep=keep, method='nlargest')
18931893

18941894
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
18951895
False: 'first'})
@@ -1927,7 +1927,7 @@ def nsmallest(self, n=5, keep='first'):
19271927
>>> s = pd.Series(np.random.randn(1e6))
19281928
>>> s.nsmallest(10) # only sorts up to the N requested
19291929
"""
1930-
return algorithms.select_n(self, n=n, keep=keep, method='nsmallest')
1930+
return algos.select_n(self, n=n, keep=keep, method='nsmallest')
19311931

19321932
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
19331933
"""
@@ -2081,7 +2081,7 @@ def map_f(values, f):
20812081
arg = self._constructor(arg, index=arg.keys())
20822082

20832083
indexer = arg.index.get_indexer(values)
2084-
new_values = com.take_1d(arg._values, indexer)
2084+
new_values = algos.take_1d(arg._values, indexer)
20852085
return self._constructor(new_values,
20862086
index=self.index).__finalize__(self)
20872087
else:
@@ -2233,7 +2233,7 @@ def _reindex_indexer(self, new_index, indexer, copy):
22332233
return self
22342234

22352235
# be subclass-friendly
2236-
new_values = com.take_1d(self.get_values(), indexer)
2236+
new_values = algos.take_1d(self.get_values(), indexer)
22372237
return self._constructor(new_values, index=new_index)
22382238

22392239
def _needs_reindex_multi(self, axes, method, level):
@@ -2384,7 +2384,7 @@ def isin(self, values):
23842384
dtype: bool
23852385
23862386
"""
2387-
result = algorithms.isin(_values_from_object(self), values)
2387+
result = algos.isin(_values_from_object(self), values)
23882388
return self._constructor(result, index=self.index).__finalize__(self)
23892389

23902390
def between(self, left, right, inclusive=True):
@@ -2627,7 +2627,7 @@ def asof(self, where):
26272627
where = Index(where)
26282628

26292629
locs = self.index.asof_locs(where, notnull(values))
2630-
new_values = com.take_1d(values, locs)
2630+
new_values = algos.take_1d(values, locs)
26312631
return self._constructor(new_values, index=where).__finalize__(self)
26322632

26332633
def to_timestamp(self, freq=None, how='start', copy=True):

pandas/core/strings.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
from pandas.core.common import (isnull, notnull, _values_from_object,
55
is_bool_dtype,
66
is_list_like, is_categorical_dtype,
7-
is_object_dtype, take_1d)
7+
is_object_dtype)
8+
from pandas.core.algorithms import take_1d
89
import pandas.compat as compat
910
from pandas.core.base import AccessorProperty, NoNewAttributesMixin
1011
from pandas.util.decorators import Appender, deprecate_kwarg

0 commit comments

Comments
 (0)