Skip to content

Commit d4ede0f

Browse files
committed
WIP/PEP8: pandas/core
1 parent 13f659f commit d4ede0f

File tree

3 files changed

+130
-83
lines changed

3 files changed

+130
-83
lines changed

pandas/core/algorithms.py

+25-18
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pandas.hashtable as htable
1313
from pandas.compat import string_types
1414

15+
1516
def match(to_match, values, na_sentinel=-1):
1617
"""
1718
Compute locations of to_match into values
@@ -44,7 +45,8 @@ def match(to_match, values, na_sentinel=-1):
4445
# replace but return a numpy array
4546
# use a Series because it handles dtype conversions properly
4647
from pandas.core.series import Series
47-
result = Series(result.ravel()).replace(-1,na_sentinel).values.reshape(result.shape)
48+
result = Series(result.ravel()).replace(-1, na_sentinel).values.\
49+
reshape(result.shape)
4850

4951
return result
5052

@@ -63,6 +65,7 @@ def unique(values):
6365
uniques
6466
"""
6567
values = com._asarray_tuplesafe(values)
68+
6669
f = lambda htype, caster: _unique_generic(values, htype, caster)
6770
return _hashtable_algo(f, values.dtype)
6871

@@ -95,9 +98,9 @@ def isin(comps, values):
9598
# work-around for numpy < 1.8 and comparisions on py3
9699
# faster for larger cases to use np.in1d
97100
if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
98-
f = lambda x, y: np.in1d(x,np.asarray(list(y)))
101+
f = lambda x, y: np.in1d(x, np.asarray(list(y)))
99102
else:
100-
f = lambda x, y: lib.ismember_int64(x,set(y))
103+
f = lambda x, y: lib.ismember_int64(x, set(y))
101104

102105
# may need i8 conversion for proper membership testing
103106
if com.is_datetime64_dtype(comps):
@@ -115,6 +118,7 @@ def isin(comps, values):
115118

116119
return f(comps, values)
117120

121+
118122
def _hashtable_algo(f, dtype, return_dtype=None):
119123
"""
120124
f(HashTable, type_caster) -> result
@@ -148,8 +152,6 @@ def _unique_generic(values, table_type, type_caster):
148152
return type_caster(uniques)
149153

150154

151-
152-
153155
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
154156
"""
155157
Encode input values as an enumerated type or categorical variable
@@ -169,12 +171,15 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
169171
-------
170172
labels : the indexer to the original array
171173
uniques : ndarray (1-d) or Index
172-
the unique values. Index is returned when passed values is Index or Series
174+
the unique values. Index is returned when passed values is Index or
175+
Series
173176
174-
note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex
177+
note: an array of Periods will ignore sort as it returns an always sorted
178+
PeriodIndex
175179
"""
176180
if order is not None:
177-
msg = "order is deprecated. See https://github.com/pydata/pandas/issues/6926"
181+
msg = "order is deprecated. See " \
182+
"https://github.com/pydata/pandas/issues/6926"
178183
warn(msg, FutureWarning, stacklevel=2)
179184

180185
from pandas.core.index import Index
@@ -203,10 +208,12 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
203208

204209
# order ints before strings
205210
ordered = np.concatenate([
206-
np.sort(np.array([ e for i, e in enumerate(uniques) if f(e) ],dtype=object)) for f in [ lambda x: not isinstance(x,string_types),
207-
lambda x: isinstance(x,string_types) ]
208-
])
209-
sorter = com._ensure_platform_int(t.lookup(com._ensure_object(ordered)))
211+
np.sort(np.array([e for i, e in enumerate(uniques) if f(e)],
212+
dtype=object)) for f in
213+
[lambda x: not isinstance(x, string_types),
214+
lambda x: isinstance(x, string_types)]])
215+
sorter = com._ensure_platform_int(t.lookup(
216+
com._ensure_object(ordered)))
210217

211218
reverse_indexer = np.empty(len(sorter), dtype=np.int_)
212219
reverse_indexer.put(sorter, np.arange(len(sorter)))
@@ -276,7 +283,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
276283
is_period = com.is_period_arraylike(values)
277284
is_datetimetz = com.is_datetimetz(values)
278285

279-
if com.is_datetime_or_timedelta_dtype(dtype) or is_period or is_datetimetz:
286+
if com.is_datetime_or_timedelta_dtype(dtype) or is_period or \
287+
is_datetimetz:
280288

281289
if is_period:
282290
values = PeriodIndex(values)
@@ -300,7 +308,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
300308
else:
301309
keys = keys.astype(dtype)
302310

303-
304311
elif com.is_integer_dtype(dtype):
305312
values = com._ensure_int64(values)
306313
keys, counts = htable.value_count_scalar64(values, dropna)
@@ -322,7 +329,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
322329

323330
if bins is not None:
324331
# TODO: This next line should be more efficient
325-
result = result.reindex(np.arange(len(cat.categories)), fill_value=0)
332+
result = result.reindex(np.arange(len(cat.categories)),
333+
fill_value=0)
326334
result.index = bins[:-1]
327335

328336
if sort:
@@ -525,12 +533,11 @@ def _finalize_nsmallest(arr, kth_val, n, keep, narr):
525533

526534

527535
def nsmallest(arr, n, keep='first'):
528-
'''
536+
"""
529537
Find the indices of the n smallest values of a numpy array.
530538
531539
Note: Fails silently with NaN.
532-
533-
'''
540+
"""
534541
if keep == 'last':
535542
arr = arr[::-1]
536543

0 commit comments

Comments
 (0)