Skip to content

Updated script to check inconsistent pandas namespace #37848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def zero(request):

Examples
--------
>>> arr = pd.RangeIndex(5)
>>> arr = RangeIndex(5)
>>> arr / zeros
Float64Index([nan, inf, inf, inf, inf], dtype='float64')
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def test_addition_ops(self):
tdi + pd.Int64Index([1, 2, 3])

# this is a union!
# pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi)
# pytest.raises(TypeError, lambda : pd.Int64Index([1,2,3]) + tdi)

result = tdi + dti # name will be reset
expected = DatetimeIndex(["20130102", pd.NaT, "20130105"])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def test_interval(self):
tm.assert_index_equal(cat.categories, idx)

# overlapping
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
idx = IntervalIndex([Interval(0, 2), Interval(0, 1)])
cat = Categorical(idx, categories=idx)
expected_codes = np.array([0, 1], dtype="int8")
tm.assert_numpy_array_equal(cat.codes, expected_codes)
Expand Down
16 changes: 7 additions & 9 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,8 +720,8 @@ def test_constructor_period_dict(self):
@pytest.mark.parametrize(
"data,dtype",
[
(pd.Period("2012-01", freq="M"), "period[M]"),
(pd.Period("2012-02-01", freq="D"), "period[D]"),
(Period("2012-01", freq="M"), "period[M]"),
(Period("2012-02-01", freq="D"), "period[D]"),
(Interval(left=0, right=5), IntervalDtype("int64")),
(Interval(left=0.1, right=0.5), IntervalDtype("float64")),
],
Expand Down Expand Up @@ -2577,7 +2577,7 @@ def test_from_records_series_list_dict(self):
def test_from_records_series_categorical_index(self):
# GH 32805
index = CategoricalIndex(
[pd.Interval(-20, -10), pd.Interval(-10, 0), pd.Interval(0, 10)]
[Interval(-20, -10), Interval(-10, 0), Interval(0, 10)]
)
series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index)
frame = DataFrame.from_records(series_of_dicts, index=index)
Expand Down Expand Up @@ -2628,7 +2628,7 @@ class List(list):
[
Categorical(list("aabbc")),
SparseArray([1, np.nan, np.nan, np.nan]),
IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
IntervalArray([Interval(0, 1), Interval(1, 5)]),
PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")),
],
)
Expand All @@ -2648,12 +2648,10 @@ def test_datetime_date_tuple_columns_from_dict(self):

def test_construct_with_two_categoricalindex_series(self):
# GH 14600
s1 = Series(
[39, 6, 4], index=pd.CategoricalIndex(["female", "male", "unknown"])
)
s1 = Series([39, 6, 4], index=CategoricalIndex(["female", "male", "unknown"]))
s2 = Series(
[2, 152, 2, 242, 150],
index=pd.CategoricalIndex(["f", "female", "m", "male", "unknown"]),
index=CategoricalIndex(["f", "female", "m", "male", "unknown"]),
)
result = DataFrame([s1, s2])
expected = DataFrame(
Expand Down Expand Up @@ -2717,7 +2715,7 @@ def test_dataframe_constructor_infer_multiindex(self):
(["1", "2"]),
(list(date_range("1/1/2011", periods=2, freq="H"))),
(list(date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))),
([pd.Interval(left=0, right=5)]),
([Interval(left=0, right=5)]),
],
)
def test_constructor_list_str(self, input_vals, string_dtype):
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def test_is_unique_interval(self, closed):
assert idx.is_unique is True

# unique overlapping - shared endpoints
idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
assert idx.is_unique is True

# unique nested
Expand Down Expand Up @@ -279,14 +279,14 @@ def test_monotonic(self, closed):
assert idx._is_strictly_monotonic_decreasing is False

# increasing overlapping shared endpoints
idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
assert idx.is_monotonic is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False

# decreasing overlapping shared endpoints
idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
assert idx.is_monotonic is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
Expand Down Expand Up @@ -872,7 +872,7 @@ def test_is_all_dates(self):
year_2017 = Interval(
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
)
year_2017_index = pd.IntervalIndex([year_2017])
year_2017_index = IntervalIndex([year_2017])
assert not year_2017_index._is_all_dates

@pytest.mark.parametrize("key", [[5], (2, 3)])
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):

def test_constructor_unwraps_index(self):
idx = Index([1, 2])
result = pd.Int64Index(idx)
result = Int64Index(idx)
expected = np.array([1, 2], dtype="int64")
tm.assert_numpy_array_equal(result._data, expected)

Expand Down Expand Up @@ -614,8 +614,8 @@ def test_int_float_union_dtype(dtype):
# https://github.com/pandas-dev/pandas/issues/26778
# [u]int | float -> float
index = Index([0, 2, 3], dtype=dtype)
other = pd.Float64Index([0.5, 1.5])
expected = pd.Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
other = Float64Index([0.5, 1.5])
expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
result = index.union(other)
tm.assert_index_equal(result, expected)

Expand All @@ -626,9 +626,9 @@ def test_int_float_union_dtype(dtype):
def test_range_float_union_dtype():
# https://github.com/pandas-dev/pandas/issues/26778
index = pd.RangeIndex(start=0, stop=3)
other = pd.Float64Index([0.5, 1.5])
other = Float64Index([0.5, 1.5])
result = index.union(other)
expected = pd.Float64Index([0.0, 0.5, 1, 1.5, 2.0])
expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0])
tm.assert_index_equal(result, expected)

result = other.union(index)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ def test_construction_consistency(self):
"data_constructor", [list, np.array], ids=["list", "ndarray[object]"]
)
def test_constructor_infer_period(self, data_constructor):
data = [pd.Period("2000", "D"), pd.Period("2001", "D"), None]
data = [Period("2000", "D"), Period("2001", "D"), None]
result = Series(data_constructor(data))
expected = Series(period_array(data))
tm.assert_series_equal(result, expected)
Expand All @@ -1057,7 +1057,7 @@ def test_construct_from_ints_including_iNaT_scalar_period_dtype(self):
assert isna(series[2])

def test_constructor_period_incompatible_frequency(self):
data = [pd.Period("2000", "D"), pd.Period("2001", "A")]
data = [Period("2000", "D"), Period("2001", "A")]
result = Series(data)
assert result.dtype == object
assert result.tolist() == data
Expand Down Expand Up @@ -1539,7 +1539,7 @@ def test_constructor_list_of_periods_infers_period_dtype(self):
assert series.dtype == "Period[D]"

series = Series(
[pd.Period("2011-01-01", freq="D"), pd.Period("2011-02-01", freq="D")]
[Period("2011-01-01", freq="D"), Period("2011-02-01", freq="D")]
)
assert series.dtype == "Period[D]"

Expand Down
49 changes: 22 additions & 27 deletions scripts/check_for_inconsistent_pandas_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,18 @@

PATTERN = r"""
(
(?<!pd\.)(?<!\w) # check class_name start with pd. or character
{class_name}\( # match DataFrame but not pd.DataFrame or tm.makeDataFrame
(?<!pd\.)(?<!\w) # check class_name doesn't start with pd. or character
([A-Z]\w+)\( # match DataFrame but not pd.DataFrame or tm.makeDataFrame
.* # match anything
pd\.{class_name}\( # only match e.g. pd.DataFrame
pd\.\2\( # only match e.g. pd.DataFrame
)|
(
pd\.{class_name}\( # only match e.g. pd.DataFrame
pd\.([A-Z]\w+)\( # only match e.g. pd.DataFrame
.* # match anything
(?<!pd\.)(?<!\w) # check class_name start with pd. or character
{class_name}\( # match DataFrame but not pd.DataFrame or tm.makeDataFrame
(?<!pd\.)(?<!\w) # check class_name doesn't start with pd. or character
\4\( # match DataFrame but not pd.DataFrame or tm.makeDataFrame
)
"""
CLASS_NAMES = (
"Series",
"DataFrame",
"Index",
"MultiIndex",
"Timestamp",
"Timedelta",
"TimedeltaIndex",
"DatetimeIndex",
"Categorical",
)
ERROR_MESSAGE = "Found both `pd.{class_name}` and `{class_name}` in {path}"


Expand All @@ -47,16 +36,22 @@ def main(argv: Optional[Sequence[str]] = None) -> None:
parser.add_argument("paths", nargs="*", type=Path)
args = parser.parse_args(argv)

for class_name in CLASS_NAMES:
pattern = re.compile(
PATTERN.format(class_name=class_name).encode(),
flags=re.MULTILINE | re.DOTALL | re.VERBOSE,
)
for path in args.paths:
contents = path.read_bytes()
match = pattern.search(contents)
assert match is None, ERROR_MESSAGE.format(
class_name=class_name, path=str(path)
pattern = re.compile(
PATTERN.encode(),
flags=re.MULTILINE | re.DOTALL | re.VERBOSE,
)
for path in args.paths:
contents = path.read_bytes()
match = pattern.search(contents)
if match is None:
continue
if match.group(2) is not None:
raise AssertionError(
ERROR_MESSAGE.format(class_name=match.group(2).decode(), path=str(path))
)
if match.group(4) is not None:
raise AssertionError(
ERROR_MESSAGE.format(class_name=match.group(4).decode(), path=str(path))
)


Expand Down