From 86e9774cb2ff48152339b4bc1148ea1df4142fb9 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 17:13:21 +0000 Subject: [PATCH 1/6] DEPR: remove Int64Index, UInt64Index, Float64Index --- asv_bench/benchmarks/hash_functions.py | 20 ++++++------ .../benchmarks/index_cached_properties.py | 6 ++-- asv_bench/benchmarks/index_object.py | 5 ++- asv_bench/benchmarks/indexing.py | 30 ++++++++--------- doc/source/reference/indexing.rst | 3 -- pandas/__init__.py | 32 ------------------- pandas/core/dtypes/generic.py | 4 +-- pandas/tests/api/test_api.py | 5 +-- pyproject.toml | 2 +- 9 files changed, 32 insertions(+), 75 deletions(-) diff --git a/asv_bench/benchmarks/hash_functions.py b/asv_bench/benchmarks/hash_functions.py index da752b902b4fd..c6e73e28f4a27 100644 --- a/asv_bench/benchmarks/hash_functions.py +++ b/asv_bench/benchmarks/hash_functions.py @@ -57,14 +57,14 @@ def time_unique(self, exponent): class NumericSeriesIndexing: params = [ - (pd.Int64Index, pd.UInt64Index, pd.Float64Index), + (np.int64, np.uint64, np.float64), (10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6), ] - param_names = ["index_dtype", "N"] + param_names = ["dtype", "N"] - def setup(self, index, N): - vals = np.array(list(range(55)) + [54] + list(range(55, N - 1))) - indices = index(vals) + def setup(self, dtype, N): + vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype) + indices = pd.Index(vals) self.data = pd.Series(np.arange(N), index=indices) def time_loc_slice(self, index, N): @@ -75,15 +75,15 @@ def time_loc_slice(self, index, N): class NumericSeriesIndexingShuffled: params = [ - (pd.Int64Index, pd.UInt64Index, pd.Float64Index), + (np.int64, np.uint64, np.float64), (10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6), ] - param_names = ["index_dtype", "N"] + param_names = ["dtype", "N"] - def setup(self, index, N): - vals = np.array(list(range(55)) + [54] + list(range(55, N - 1))) + def setup(self, dtype, N): + vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype) np.random.shuffle(vals) - indices = index(vals) + indices = pd.Index(vals) self.data = pd.Series(np.arange(N), index=indices) def time_loc_slice(self, index, N): diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py index 349841f695416..b3d8de39a858a 100644 --- a/asv_bench/benchmarks/index_cached_properties.py +++ b/asv_bench/benchmarks/index_cached_properties.py @@ -30,7 +30,7 @@ def setup(self, index_type): elif index_type == "DatetimeIndex": self.idx = pd.date_range("1/1/2000", freq="T", periods=N) elif index_type == "Int64Index": - self.idx = pd.Index(range(N)) + self.idx = pd.Index(range(N), dtype="int64") elif index_type == "PeriodIndex": self.idx = pd.period_range("1/1/2000", freq="T", periods=N) elif index_type == "RangeIndex": @@ -40,9 +40,9 @@ def setup(self, index_type): elif index_type == "TimedeltaIndex": self.idx = pd.TimedeltaIndex(range(N)) elif index_type == "Float64Index": - self.idx = pd.Float64Index(range(N)) + self.idx = pd.Index(range(N), dtype="float64") elif index_type == "UInt64Index": - self.idx = pd.UInt64Index(range(N)) + self.idx = pd.Index(range(N), dtype="uint64") elif index_type == "CategoricalIndex": self.idx = pd.CategoricalIndex(range(N), range(N)) else: diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index d4e2ebfed08b4..9b72483745169 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -4,7 +4,6 @@ from pandas import ( DatetimeIndex, - Float64Index, Index, IntervalIndex, MultiIndex, @@ -202,8 +201,8 @@ class Float64IndexMethod: # GH 13166 def setup(self): N = 100_000 - a = np.arange(N) - self.ind = Float64Index(a * 4.8000000418824129e-08) + a = np.arange(N, dtype=np.float64) + self.ind = Index(a * 4.8000000418824129e-08) def time_get_loc(self): self.ind.get_loc(0) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 69e3d166943a8..3efc39c7e1efe 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -12,12 +12,10 @@ from pandas import ( CategoricalIndex, DataFrame, - Float64Index, - Int64Index, + Index, IntervalIndex, MultiIndex, Series, - UInt64Index, concat, date_range, option_context, @@ -30,17 +28,17 @@ class NumericSeriesIndexing: params = [ - (Int64Index, UInt64Index, Float64Index), + (np.int64, np.uint64, np.float64), ("unique_monotonic_inc", "nonunique_monotonic_inc"), ] - param_names = ["index_dtype", "index_structure"] + param_names = ["dtype", "index_structure"] - def setup(self, index, index_structure): + def setup(self, dtype, index_structure): N = 10**6 indices = { - "unique_monotonic_inc": index(range(N)), - "nonunique_monotonic_inc": index( - list(range(55)) + [54] + list(range(55, N - 1)) + "unique_monotonic_inc": Index(range(N), dtype=dtype), + "nonunique_monotonic_inc": Index( + list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype ), } self.data = Series(np.random.rand(N), index=indices[index_structure]) @@ -159,17 +157,17 @@ def time_boolean_rows_boolean(self): class DataFrameNumericIndexing: params = [ - (Int64Index, UInt64Index, Float64Index), + (np.int64, np.uint64, np.float64), ("unique_monotonic_inc", "nonunique_monotonic_inc"), ] - param_names = ["index_dtype", "index_structure"] + param_names = ["dtype", "index_structure"] - def setup(self, index, index_structure): + def setup(self, dtype, index_structure): N = 10**5 indices = { - "unique_monotonic_inc": index(range(N)), - "nonunique_monotonic_inc": index( - list(range(55)) + [54] + list(range(55, N - 1)) + "unique_monotonic_inc": Index(range(N), dtype=dtype), + "nonunique_monotonic_inc": Index( + list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype ), } self.idx_dupe = np.array(range(30)) * 99 @@ -201,7 +199,7 @@ class Take: def setup(self, index): N = 100000 indexes = { - "int": Int64Index(np.arange(N)), + "int": Index(np.arange(N), dtype=np.int64), "datetime": date_range("2011-01-01", freq="S", periods=N), } index = indexes[index] diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 81148b4a29df5..2fc4b846e5d18 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -166,9 +166,6 @@ Numeric Index :template: autosummary/class_without_autosummary.rst RangeIndex - Int64Index - UInt64Index - Float64Index .. We need this autosummary so that the methods are generated. .. Separate block, since they aren't classes. diff --git a/pandas/__init__.py b/pandas/__init__.py index cae768dae722e..585a1ae341217 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -183,38 +183,6 @@ __git_version__ = v.get("full-revisionid") del get_versions, v -# GH 27101 -__deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"] - - -def __dir__() -> list[str]: - # GH43028 - # Int64Index etc. are deprecated, but we still want them to be available in the dir. - # Remove in Pandas 2.0, when we remove Int64Index etc. from the code base. - return list(globals().keys()) + __deprecated_num_index_names - - -def __getattr__(name): - import warnings - - if name in __deprecated_num_index_names: - warnings.warn( - f"pandas.{name} is deprecated " - "and will be removed from pandas in a future version. " - "Use pandas.Index with the appropriate dtype instead.", - FutureWarning, - stacklevel=2, - ) - from pandas.core.api import Float64Index, Int64Index, UInt64Index - - return { - "Float64Index": Float64Index, - "Int64Index": Int64Index, - "UInt64Index": UInt64Index, - }[name] - - raise AttributeError(f"module 'pandas' has no attribute '{name}'") - # module level doc-string __doc__ = """ diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index f9e96acf60e50..ef9c529ea9bc8 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -13,16 +13,13 @@ CategoricalIndex, DataFrame, DatetimeIndex, - Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, RangeIndex, Series, TimedeltaIndex, - UInt64Index, ) from pandas.core.arrays import ( DatetimeArray, @@ -32,6 +29,7 @@ TimedeltaArray, ) from pandas.core.generic import NDFrame + from pandas.core.indexes.api import Float64Index, Int64Index, UInt64Index # define abstract base classes to enable isinstance type checking on our diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b3a60c1fc5d37..995b1668046d2 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -55,17 +55,14 @@ class TestPDApi(Base): "DatetimeIndex", "ExcelFile", "ExcelWriter", - "Float64Index", "Flags", "Grouper", "HDFStore", "Index", - "Int64Index", "MultiIndex", "Period", "PeriodIndex", "RangeIndex", - "UInt64Index", "Series", "SparseDtype", "StringDtype", @@ -93,7 +90,7 @@ class TestPDApi(Base): ] # these are already deprecated; awaiting removal - deprecated_classes: list[str] = ["Float64Index", "Int64Index", "UInt64Index"] + deprecated_classes: list[str] = [] # external modules exposed in pandas namespace modules: list[str] = [] diff --git a/pyproject.toml b/pyproject.toml index 71b1f44dbff6f..2cad126a50bd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -173,7 +173,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst minversion = "6.0" -addopts = "--strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" +addopts = "--pdbcls=IPython.terminal.debugger:TerminalPdb --skip-slow --skip-network --skip-db --strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true testpaths = "pandas" From 723a5d4eac20d47e477530b203debaddd23a477b Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 17:35:10 +0000 Subject: [PATCH 2/6] fix isort --- .pre-commit-config.yaml | 5 ----- pandas/core/dtypes/generic.py | 6 +++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6aa1f5659365f..77771367f3e7f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -59,11 +59,6 @@ repos: - flake8==5.0.4 - flake8-bugbear==22.7.1 - pandas-dev-flaker==0.5.0 -- repo: https://github.com/pycqa/pylint - rev: v2.15.5 - hooks: - - id: pylint - stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index ef9c529ea9bc8..9b30ff60570ce 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -29,7 +29,11 @@ TimedeltaArray, ) from pandas.core.generic import NDFrame - from pandas.core.indexes.api import Float64Index, Int64Index, UInt64Index + from pandas.core.indexes.api import ( + Float64Index, + Int64Index, + UInt64Index, + ) # define abstract base classes to enable isinstance type checking on our From f3cceb83825a264026503a83b445cccf2fcabc7b Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 17:45:15 +0000 Subject: [PATCH 3/6] fix stuff --- .pre-commit-config.yaml | 5 +++++ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 77771367f3e7f..6aa1f5659365f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -59,6 +59,11 @@ repos: - flake8==5.0.4 - flake8-bugbear==22.7.1 - pandas-dev-flaker==0.5.0 +- repo: https://github.com/pycqa/pylint + rev: v2.15.5 + hooks: + - id: pylint + stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: diff --git a/pyproject.toml b/pyproject.toml index 2cad126a50bd7..71b1f44dbff6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -173,7 +173,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst minversion = "6.0" -addopts = "--pdbcls=IPython.terminal.debugger:TerminalPdb --skip-slow --skip-network --skip-db --strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" +addopts = "--strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true testpaths = "pandas" From d2118e3f8c09856d86f86d309f155e81fb9f9585 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 20:03:03 +0000 Subject: [PATCH 4/6] fix pyright & mypy --- pandas/conftest.py | 3 ++- pandas/core/construction.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/internals/array_manager.py | 2 +- pandas/core/internals/blocks.py | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 308f63a4ebe5c..a64dec7027c57 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -56,6 +56,7 @@ import pandas as pd from pandas import ( + compat, DataFrame, Interval, Period, @@ -79,7 +80,7 @@ has_pyarrow = True zoneinfo = None -if pd.compat.PY39: +if compat.PY39: # Import "zoneinfo" could not be resolved (reportMissingImports) import zoneinfo # type: ignore[no-redef] diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 3ae509e74074e..9b9c24083b1a1 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -68,10 +68,10 @@ if TYPE_CHECKING: from pandas import ( - ExtensionArray, Index, Series, ) + from pandas.core.arrays.base import ExtensionArray def array( diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 6fcad23e4b4c3..b72cc401a16de 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -70,7 +70,7 @@ from pandas.core.tools.times import to_time if TYPE_CHECKING: - from pandas import ( + from pandas.core.api import ( DataFrame, Float64Index, PeriodIndex, diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index d325e5e9b92cc..f4b4aec99251b 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -96,7 +96,7 @@ ) if TYPE_CHECKING: - from pandas import Float64Index + from pandas.core.api import Float64Index T = TypeVar("T", bound="BaseArrayManager") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cab8901ff3596..ad1383b50bb0c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -109,7 +109,7 @@ from pandas.core.indexers import check_setitem_lengths if TYPE_CHECKING: - from pandas import ( + from pandas.core.api import ( Float64Index, Index, ) From d1cb7f339a573a591d5aed73cc2712e29b5d7a45 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 20:39:47 +0000 Subject: [PATCH 5/6] fix isort --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index a64dec7027c57..5237b94764686 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -56,13 +56,13 @@ import pandas as pd from pandas import ( - compat, DataFrame, Interval, Period, Series, Timedelta, Timestamp, + compat, ) import pandas._testing as tm from pandas.core import ops From 8bd451d5cf29a4a65c01ab03352a768ec56151a5 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 23:10:47 +0000 Subject: [PATCH 6/6] fix mypy --- pandas/core/arrays/datetimelike.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b1d9fba22b484..a5dc53711ed37 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1354,8 +1354,9 @@ def _addsub_object_array(self, other: np.ndarray, op): res_values = op(self.astype("O"), np.asarray(other)) - result = pd_array(res_values.ravel()) - result = extract_array(result, extract_numpy=True).reshape(self.shape) + ext_arr = pd_array(res_values.ravel()) + result = cast(np.ndarray, extract_array(ext_arr, extract_numpy=True)) + result = result.reshape(self.shape) return result def _time_shift(