From 313db6b64ed9142c7dccef849a95b1074dae3271 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 11 Aug 2020 19:27:41 +0100 Subject: [PATCH 1/4] PERF: make RangeIndex.__iter__ iterate over ._range --- pandas/core/indexes/range.py | 4 ++++ pandas/tests/indexes/ranges/test_range.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 3577a7aacc008..7efb70f0752e2 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -373,6 +373,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): def tolist(self): return list(self._range) + @doc(Int64Index.__iter__) + def __iter__(self): + yield from self._range + @doc(Int64Index._shallow_copy) def _shallow_copy(self, values=None, name: Label = no_default): name = self.name if name is no_default else name diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index ef4bb9a0869b0..3313df0e28c7d 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -167,6 +167,10 @@ def test_cache(self): idx.any() assert idx._cache == {} + for _ in idx: + ... + assert idx._cache == {} + df = pd.DataFrame({"a": range(10)}, index=idx) df.loc[50] From c93cab003311807ee3fcb91c3039a4168b19d0e9 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 12 Aug 2020 06:41:26 +0100 Subject: [PATCH 2/4] ... -> pass --- pandas/tests/indexes/ranges/test_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 3313df0e28c7d..c4c242746e92c 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -168,7 +168,7 @@ def test_cache(self): assert idx._cache == {} for _ in idx: - ... + pass assert idx._cache == {} df = pd.DataFrame({"a": range(10)}, index=idx) From 1c5a6c79e7ab48d64ad57d5d44e0711558cf47e1 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 12 Aug 2020 18:05:49 +0100 Subject: [PATCH 3/4] added ASVs --- asv_bench/benchmarks/index_object.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index b242de6a17208..70b96183825d8 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -78,6 +78,16 @@ def time_get_loc_inc(self): def time_get_loc_dec(self): self.idx_dec.get_loc(100000) + def time_iter_inc(self): + for i in self.idx_inc: + if i >= 100_000: + break + + def time_iter_dec(self): + for i in self.idx_dec: + if i >= 100_000: + break + class IndexEquals: def setup(self): From 4206801430e38a6d20bf25dbea2ac05146451bb1 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 12 Aug 2020 22:36:23 +0100 Subject: [PATCH 4/4] index_object cleanup --- asv_bench/benchmarks/index_object.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 70b96183825d8..9c05019c70396 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -57,8 +57,8 @@ def time_datetime_difference_disjoint(self): class Range: def setup(self): - self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3) - self.idx_dec = RangeIndex(start=10 ** 7, stop=-1, step=-3) + self.idx_inc = RangeIndex(start=0, stop=10 ** 6, step=3) + self.idx_dec = RangeIndex(start=10 ** 6, stop=-1, step=-3) def time_max(self): self.idx_inc.max() @@ -73,25 +73,23 @@ def time_min_trivial(self): self.idx_inc.min() def time_get_loc_inc(self): - self.idx_inc.get_loc(900000) + self.idx_inc.get_loc(900_000) def time_get_loc_dec(self): - self.idx_dec.get_loc(100000) + self.idx_dec.get_loc(100_000) def time_iter_inc(self): - for i in self.idx_inc: - if i >= 100_000: - break + for _ in self.idx_inc: + pass def time_iter_dec(self): - for i in self.idx_dec: - if i >= 100_000: - break + for _ in self.idx_dec: + pass class IndexEquals: def setup(self): - idx_large_fast = RangeIndex(100000) + idx_large_fast = RangeIndex(100_000) idx_small_slow = date_range(start="1/1/2012", periods=1) self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow]) @@ -104,7 +102,7 @@ def time_non_object_equals_multiindex(self): class IndexAppend: def setup(self): - N = 10000 + N = 10_000 self.range_idx = RangeIndex(0, 100) self.int_idx = self.range_idx.astype(int) self.obj_idx = self.int_idx.astype(str) @@ -178,7 +176,7 @@ def time_get_loc_non_unique_sorted(self, dtype): class Float64IndexMethod: # GH 13166 def setup(self): - N = 100000 + N = 100_000 a = np.arange(N) self.ind = Float64Index(a * 4.8000000418824129e-08) @@ -222,7 +220,7 @@ class GC: params = [1, 2, 5] def create_use_drop(self): - idx = Index(list(range(1000 * 1000))) + idx = Index(list(range(1_000_000))) idx._engine def peakmem_gc_instances(self, N):