Skip to content

DEPS/CLN: remove distutils usage #41207

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
May 5, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2153645
DEPS: remove most of distutils usage #41199
fangchenli Apr 28, 2021
6edbf7a
remove old compat check for pyarrow
fangchenli Apr 28, 2021
9370f02
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli Apr 28, 2021
46e9271
remove more unused compat
fangchenli Apr 28, 2021
ff0d148
fix version
fangchenli Apr 29, 2021
f72e5fa
remove outdated skip
fangchenli Apr 29, 2021
3b8e377
fix more version, remove outdated compat
fangchenli Apr 29, 2021
d967c8e
fix more version
fangchenli Apr 29, 2021
9743f0b
fix pyarrow min version in tests, remove unused skip
fangchenli Apr 29, 2021
66bcc6a
add xarray skip back
fangchenli Apr 29, 2021
7eaf325
Merge branch 'master' into replace-distutils
fangchenli Apr 29, 2021
729602a
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli Apr 29, 2021
ef9952c
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli Apr 30, 2021
4e858b5
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli Apr 30, 2021
3ab03d3
vendor packaging.version
fangchenli Apr 30, 2021
4800880
vendor packaging.version
fangchenli Apr 30, 2021
325a978
fix flake8 error
fangchenli Apr 30, 2021
af362b0
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli Apr 30, 2021
0f9f05f
debug
fangchenli Apr 30, 2021
ac12b9a
fix import error
fangchenli Apr 30, 2021
06cdb4d
fix import space
fangchenli Apr 30, 2021
0c8bd70
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli May 1, 2021
df0b26f
remove distutils build
fangchenli May 1, 2021
edd368f
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli May 1, 2021
240c1aa
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli May 2, 2021
39239e3
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli May 3, 2021
d9e9e7b
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli May 3, 2021
186432f
Merge remote-tracking branch 'upstream/master' into replace-distutils
fangchenli May 5, 2021
17d4a39
update Version()
fangchenli May 5, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import distutils.version
import importlib
import sys
import types
import warnings

from packaging.version import Version

# Update install.rst when updating versions!

VERSIONS = {
Expand Down Expand Up @@ -128,7 +129,7 @@ def import_optional_dependency(
minimum_version = min_version if min_version is not None else VERSIONS.get(parent)
if minimum_version:
version = get_version(module_to_get)
if distutils.version.LooseVersion(version) < minimum_version:
if Version(version) < Version(minimum_version):
msg = (
f"Pandas requires version '{minimum_version}' or newer of '{parent}' "
f"(version '{version}' currently installed)."
Expand Down
12 changes: 6 additions & 6 deletions pandas/compat/numpy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
""" support numpy compatibility across versions """

from distutils.version import LooseVersion
import re

import numpy as np
from packaging.version import Version

# numpy versioning
_np_version = np.__version__
_nlv = LooseVersion(_np_version)
np_version_under1p18 = _nlv < LooseVersion("1.18")
np_version_under1p19 = _nlv < LooseVersion("1.19")
np_version_under1p20 = _nlv < LooseVersion("1.20")
_nlv = Version(_np_version)
np_version_under1p18 = _nlv < Version("1.18")
np_version_under1p19 = _nlv < Version("1.19")
np_version_under1p20 = _nlv < Version("1.20")
is_numpy_dev = ".dev" in str(_nlv)
_min_numpy_ver = "1.17.3"


if _nlv < _min_numpy_ver:
if _nlv < Version(_min_numpy_ver):
raise ImportError(
f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n"
f"your numpy version is {_np_version}.\n"
Expand Down
4 changes: 2 additions & 2 deletions pandas/compat/numpy/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures.
"""
from distutils.version import LooseVersion
from typing import (
Any,
Dict,
Expand All @@ -27,6 +26,7 @@
__version__,
ndarray,
)
from packaging.version import Version

from pandas._libs.lib import (
is_bool,
Expand Down Expand Up @@ -128,7 +128,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs):
ARGSORT_DEFAULTS["kind"] = "quicksort"
ARGSORT_DEFAULTS["order"] = None

if LooseVersion(__version__) >= LooseVersion("1.17.0"):
if Version(__version__) >= Version("1.17.0"):
# GH-26361. NumPy added radix sort and changed default to None.
ARGSORT_DEFAULTS["kind"] = None

Expand Down
191 changes: 94 additions & 97 deletions pandas/core/arrays/_arrow_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from distutils.version import LooseVersion
import json

import numpy as np
import pyarrow

from pandas.core.arrays.interval import VALID_CLOSED

_pyarrow_version_ge_015 = LooseVersion(pyarrow.__version__) >= LooseVersion("0.15")


def pyarrow_array_to_numpy_and_mask(arr, dtype):
"""
Expand Down Expand Up @@ -48,97 +45,97 @@ def pyarrow_array_to_numpy_and_mask(arr, dtype):
return data, mask


if _pyarrow_version_ge_015:
# the pyarrow extension types are only available for pyarrow 0.15+

class ArrowPeriodType(pyarrow.ExtensionType):
def __init__(self, freq):
# attributes need to be set first before calling
# super init (as that calls serialize)
self._freq = freq
pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period")

@property
def freq(self):
return self._freq

def __arrow_ext_serialize__(self):
metadata = {"freq": self.freq}
return json.dumps(metadata).encode()

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
metadata = json.loads(serialized.decode())
return ArrowPeriodType(metadata["freq"])

def __eq__(self, other):
if isinstance(other, pyarrow.BaseExtensionType):
return type(self) == type(other) and self.freq == other.freq
else:
return NotImplemented

def __hash__(self):
return hash((str(self), self.freq))

def to_pandas_dtype(self):
import pandas as pd

return pd.PeriodDtype(freq=self.freq)

# register the type with a dummy instance
_period_type = ArrowPeriodType("D")
pyarrow.register_extension_type(_period_type)

class ArrowIntervalType(pyarrow.ExtensionType):
def __init__(self, subtype, closed):
# attributes need to be set first before calling
# super init (as that calls serialize)
assert closed in VALID_CLOSED
self._closed = closed
if not isinstance(subtype, pyarrow.DataType):
subtype = pyarrow.type_for_alias(str(subtype))
self._subtype = subtype

storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")

@property
def subtype(self):
return self._subtype

@property
def closed(self):
return self._closed

def __arrow_ext_serialize__(self):
metadata = {"subtype": str(self.subtype), "closed": self.closed}
return json.dumps(metadata).encode()

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
metadata = json.loads(serialized.decode())
subtype = pyarrow.type_for_alias(metadata["subtype"])
closed = metadata["closed"]
return ArrowIntervalType(subtype, closed)

def __eq__(self, other):
if isinstance(other, pyarrow.BaseExtensionType):
return (
type(self) == type(other)
and self.subtype == other.subtype
and self.closed == other.closed
)
else:
return NotImplemented

def __hash__(self):
return hash((str(self), str(self.subtype), self.closed))

def to_pandas_dtype(self):
import pandas as pd

return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed)

# register the type with a dummy instance
_interval_type = ArrowIntervalType(pyarrow.int64(), "left")
pyarrow.register_extension_type(_interval_type)
class ArrowPeriodType(pyarrow.ExtensionType):
def __init__(self, freq):
# attributes need to be set first before calling
# super init (as that calls serialize)
self._freq = freq
pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period")

@property
def freq(self):
return self._freq

def __arrow_ext_serialize__(self):
metadata = {"freq": self.freq}
return json.dumps(metadata).encode()

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
metadata = json.loads(serialized.decode())
return ArrowPeriodType(metadata["freq"])

def __eq__(self, other):
if isinstance(other, pyarrow.BaseExtensionType):
return type(self) == type(other) and self.freq == other.freq
else:
return NotImplemented

def __hash__(self):
return hash((str(self), self.freq))

def to_pandas_dtype(self):
import pandas as pd

return pd.PeriodDtype(freq=self.freq)


# register the type with a dummy instance
_period_type = ArrowPeriodType("D")
pyarrow.register_extension_type(_period_type)


class ArrowIntervalType(pyarrow.ExtensionType):
def __init__(self, subtype, closed):
# attributes need to be set first before calling
# super init (as that calls serialize)
assert closed in VALID_CLOSED
self._closed = closed
if not isinstance(subtype, pyarrow.DataType):
subtype = pyarrow.type_for_alias(str(subtype))
self._subtype = subtype

storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")

@property
def subtype(self):
return self._subtype

@property
def closed(self):
return self._closed

def __arrow_ext_serialize__(self):
metadata = {"subtype": str(self.subtype), "closed": self.closed}
return json.dumps(metadata).encode()

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
metadata = json.loads(serialized.decode())
subtype = pyarrow.type_for_alias(metadata["subtype"])
closed = metadata["closed"]
return ArrowIntervalType(subtype, closed)

def __eq__(self, other):
if isinstance(other, pyarrow.BaseExtensionType):
return (
type(self) == type(other)
and self.subtype == other.subtype
and self.closed == other.closed
)
else:
return NotImplemented

def __hash__(self):
return hash((str(self), str(self.subtype), self.closed))

def to_pandas_dtype(self):
import pandas as pd

return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed)


# register the type with a dummy instance
_interval_type = ArrowIntervalType(pyarrow.int64(), "left")
pyarrow.register_extension_type(_interval_type)
6 changes: 3 additions & 3 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from distutils.version import LooseVersion
import re
from typing import (
TYPE_CHECKING,
Expand All @@ -10,6 +9,7 @@
)

import numpy as np
from packaging.version import Version

from pandas._libs import (
lib,
Expand Down Expand Up @@ -55,7 +55,7 @@
# PyArrow backed StringArrays are available starting at 1.0.0, but this
# file is imported from even if pyarrow is < 1.0.0, before pyarrow.compute
# and its compute functions existed. GH38801
if LooseVersion(pa.__version__) >= "1.0.0":
if Version(pa.__version__) >= Version("1.0.0"):
import pyarrow.compute as pc

ARROW_CMP_FUNCS = {
Expand Down Expand Up @@ -223,7 +223,7 @@ def __init__(self, values):
def _chk_pyarrow_available(cls) -> None:
# TODO: maybe update import_optional_dependency to allow a minimum
# version to be specified rather than use the global minimum
if pa is None or LooseVersion(pa.__version__) < "1.0.0":
if pa is None or Version(pa.__version__) < Version("1.0.0"):
msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray."
raise ImportError(msg)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from __future__ import annotations

from datetime import datetime
from distutils.version import LooseVersion
from functools import partial
import operator
from typing import (
Expand All @@ -14,6 +13,7 @@
)

import numpy as np
from packaging.version import Version

from pandas._libs.tslibs import Timestamp

Expand Down Expand Up @@ -623,7 +623,7 @@ def __init__(self, name: str):

if name not in MATHOPS or (
NUMEXPR_INSTALLED
and NUMEXPR_VERSION < LooseVersion("2.6.9")
and Version(NUMEXPR_VERSION) < Version("2.6.9")
and name in ("floor", "ceil")
):
raise ValueError(f'"{name}" is not a supported function')
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/util/numba_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Common utilities for Numba operations"""
from distutils.version import LooseVersion
import types
from typing import (
Callable,
Expand All @@ -9,6 +8,7 @@
)

import numpy as np
from packaging.version import Version

from pandas.compat._optional import import_optional_dependency
from pandas.errors import NumbaUtilError
Expand Down Expand Up @@ -89,7 +89,7 @@ def jit_user_function(
"""
numba = import_optional_dependency("numba")

if LooseVersion(numba.__version__) >= LooseVersion("0.49.0"):
if Version(numba.__version__) >= Version("0.49.0"):
is_jitted = numba.extending.is_jitted(func)
else:
is_jitted = isinstance(func, numba.targets.registry.CPUDispatcher)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/clipboard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@
get_errno,
sizeof,
)
import distutils.spawn
import os
import platform
from shutil import which
import subprocess
import time
import warnings
Expand Down Expand Up @@ -528,7 +528,7 @@ def determine_clipboard():
return init_windows_clipboard()

if platform.system() == "Linux":
if distutils.spawn.find_executable("wslconfig.exe"):
if which("wslconfig.exe"):
return init_wsl_clipboard()

# Setup for the MAC OS X platform:
Expand Down
Loading