From cc8cc4c6910db697966c838ef220f17016ade20c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 17 Oct 2018 11:21:16 -0700 Subject: [PATCH 1/2] Address some todos --- pandas/core/common.py | 26 ------------- pandas/core/frame.py | 12 +++++- pandas/core/generic.py | 10 ----- pandas/core/indexes/api.py | 16 +++++++- pandas/core/indexes/timedeltas.py | 46 ++--------------------- pandas/core/internals/blocks.py | 4 +- pandas/tests/frame/test_mutate_columns.py | 2 +- 7 files changed, 31 insertions(+), 85 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 8bbaabe8c08af..f6e40faa79740 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -68,17 +68,6 @@ def consensus_name_attr(objs): return name -# TODO: only used once in frame.py; belongs elsewhere? -def get_info_slice(obj, indexer): - """Slice the info axis of `obj` with `indexer`.""" - if not hasattr(obj, '_info_axis_number'): - msg = 'object of type {typ!r} has no info axis' - raise TypeError(msg.format(typ=type(obj).__name__)) - slices = [slice(None)] * obj.ndim - slices[obj._info_axis_number] = indexer - return tuple(slices) - - def maybe_box(indexer, values, obj, key): # if we have multiples coming back, box em @@ -432,21 +421,6 @@ def random_state(state=None): "RandomState, or None") -# TODO: only used once in indexes.api; belongs elsewhere? -def get_distinct_objs(objs): - """ - Return a list with distinct elements of "objs" (different ids). - Preserves order. - """ - ids = set() - res = [] - for obj in objs: - if not id(obj) in ids: - ids.add(id(obj)) - res.append(obj) - return res - - def _pipe(obj, func, *args, **kwargs): """ Apply a function ``func`` to object ``obj`` either by passing obj as the diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f3873b4299a5..ab1906dcd4be9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3205,7 +3205,7 @@ def is_dtype_instance_mapper(idx, dtype): exclude_these.iloc[idx] = not any(map(f, exclude)) dtype_indexer = include_these & exclude_these - return self.loc[com.get_info_slice(self, dtype_indexer)] + return self.loc[_get_info_slice(self, dtype_indexer)] def _box_item_values(self, key, values): items = self.columns[self.columns.get_loc(key)] @@ -8040,3 +8040,13 @@ def _from_nested_dict(data): def _put_str(s, space): return u'{s}'.format(s=s)[:space].ljust(space) + + +def _get_info_slice(obj, indexer): + """Slice the info axis of `obj` with `indexer`.""" + if not hasattr(obj, '_info_axis_number'): + msg = 'object of type {typ!r} has no info axis' + raise TypeError(msg.format(typ=type(obj).__name__)) + slices = [slice(None)] * obj.ndim + slices[obj._info_axis_number] = indexer + return tuple(slices) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba050bfc8db77..38688a7e60566 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4069,16 +4069,6 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False, return self._constructor(new_data).__finalize__(self) - # TODO: unused; remove? - def _reindex_axis(self, new_index, fill_method, axis, copy): - new_data = self._data.reindex_axis(new_index, axis=axis, - method=fill_method, copy=copy) - - if new_data is self._data and not copy: - return self - else: - return self._constructor(new_data).__finalize__(self) - def filter(self, items=None, like=None, regex=None, axis=None): """ Subset rows or columns of dataframe according to labels in diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index e50a4b099a8e1..6e0c5e1bc2fe0 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -53,9 +53,23 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) +def _get_distinct_objs(objs): + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids = set() + res = [] + for obj in objs: + if not id(obj) in ids: + ids.add(id(obj)) + res.append(obj) + return res + + def _get_combined_index(indexes, intersect=False, sort=False): # TODO: handle index names! - indexes = com.get_distinct_objs(indexes) + indexes = _get_distinct_objs(indexes) if len(indexes) == 0: index = Index([]) elif len(indexes) == 1: diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 56b6dc7051d9f..8c94ac2ea8256 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -511,16 +511,13 @@ def _maybe_cast_slice_bound(self, label, side, kind): return label - def _get_string_slice(self, key, use_lhs=True, use_rhs=True): - freq = getattr(self, 'freqstr', - getattr(self, 'inferred_freq', None)) + def _get_string_slice(self, key): if is_integer(key) or is_float(key) or key is NaT: self._invalid_indexer('slice', key) - loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, - use_rhs=use_rhs) + loc = self._partial_td_slice(key) return loc - def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True): + def _partial_td_slice(self, key): # given a key, try to figure out a location for a partial slice if not isinstance(key, compat.string_types): @@ -528,43 +525,6 @@ def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True): raise NotImplementedError - # TODO(wesm): dead code - # parsed = _coerce_scalar_to_timedelta_type(key, box=True) - - # is_monotonic = self.is_monotonic - - # # figure out the resolution of the passed td - # # and round to it - - # # t1 = parsed.round(reso) - - # t2 = t1 + to_offset(parsed.resolution) - Timedelta(1, 'ns') - - # stamps = self.asi8 - - # if is_monotonic: - - # # we are out of range - # if (len(stamps) and ((use_lhs and t1.value < stamps[0] and - # t2.value < stamps[0]) or - # ((use_rhs and t1.value > stamps[-1] and - # t2.value > stamps[-1])))): - # raise KeyError - - # # a monotonic (sorted) series can be sliced - # left = (stamps.searchsorted(t1.value, side='left') - # if use_lhs else None) - # right = (stamps.searchsorted(t2.value, side='right') - # if use_rhs else None) - - # return slice(left, right) - - # lhs_mask = (stamps >= t1.value) if use_lhs else True - # rhs_mask = (stamps <= t2.value) if use_rhs else True - - # # try to find a the dates - # return (lhs_mask & rhs_mask).nonzero()[0] - @Substitution(klass='TimedeltaIndex') @Appender(_shared_docs['searchsorted']) def searchsorted(self, value, side='left', sorter=None): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 214fcb097f736..6fb1184f48b69 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2812,9 +2812,7 @@ def _astype(self, dtype, mgr=None, **kwargs): def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: - # TODO: this still uses asarray, instead of dtype.type - element = np.array(element) - return element.dtype == _NS_DTYPE or element.dtype == np.int64 + return tipo == _NS_DTYPE or tipo == np.int64 return (is_integer(element) or isinstance(element, datetime) or isna(element)) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 51ffe2966b4e5..102cc52aa46cb 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -233,7 +233,7 @@ def test_pop(self): self.frame['foo'] = 'bar' self.frame.pop('foo') assert 'foo' not in self.frame - # TODO assert self.frame.columns.name == 'baz' + assert self.frame.columns.name == 'baz' # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[ From 0e1ceaa796427960181d7bddf88913e3ff38e2e2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 18 Oct 2018 14:30:59 -0700 Subject: [PATCH 2/2] inline function --- pandas/core/frame.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ab1906dcd4be9..e8acb37ad3b58 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3157,6 +3157,14 @@ def select_dtypes(self, include=None, exclude=None): 4 True 1.0 5 False 2.0 """ + def _get_info_slice(obj, indexer): + """Slice the info axis of `obj` with `indexer`.""" + if not hasattr(obj, '_info_axis_number'): + msg = 'object of type {typ!r} has no info axis' + raise TypeError(msg.format(typ=type(obj).__name__)) + slices = [slice(None)] * obj.ndim + slices[obj._info_axis_number] = indexer + return tuple(slices) if not is_list_like(include): include = (include,) if include is not None else () @@ -8040,13 +8048,3 @@ def _from_nested_dict(data): def _put_str(s, space): return u'{s}'.format(s=s)[:space].ljust(space) - - -def _get_info_slice(obj, indexer): - """Slice the info axis of `obj` with `indexer`.""" - if not hasattr(obj, '_info_axis_number'): - msg = 'object of type {typ!r} has no info axis' - raise TypeError(msg.format(typ=type(obj).__name__)) - slices = [slice(None)] * obj.ndim - slices[obj._info_axis_number] = indexer - return tuple(slices)