Skip to content

Move inconsistent namespace check to pre-commit, fixup more files #37662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 8, 2020
Merged
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ repos:
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
types: [python]
exclude: ^(asv_bench|pandas/tests|doc)/
- id: inconsistent-namespace-usage
name: 'Check for inconsistent use of pandas namespace in tests'
entry: python scripts/check_for_inconsistent_pandas_namespace.py
language: python
types: [python]
files: ^pandas/tests/
- id: FrameOrSeriesUnion
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
Expand Down
13 changes: 0 additions & 13 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ function invgrep {
return $((! $EXIT_STATUS))
}

function check_namespace {
local -r CLASS=${1}
grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}[(\.]"
test $? -gt 0
}

if [[ "$GITHUB_ACTIONS" == "true" ]]; then
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
INVGREP_PREPEND="##[error]"
Expand Down Expand Up @@ -120,13 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
invgrep -R --include=*.{py,pyx} '!r}' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

# -------------------------------------------------------------------------
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do
check_namespace ${class}
RET=$(($RET + $?))
done
echo $MSG "DONE"
fi

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def test_cast_category_to_extension_dtype(self, expected):
(
"datetime64[ns, MET]",
pd.DatetimeIndex(
[pd.Timestamp("2015-01-01 00:00:00+0100", tz="MET")]
[Timestamp("2015-01-01 00:00:00+0100", tz="MET")]
).array,
),
],
Expand Down Expand Up @@ -254,7 +254,7 @@ def _compare_other(self, s, data, op_name, other):

@pytest.mark.parametrize(
"categories",
[["a", "b"], [0, 1], [pd.Timestamp("2019"), pd.Timestamp("2020")]],
[["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]],
)
def test_not_equal_with_na(self, categories):
# https://github.com/pandas-dev/pandas/issues/32276
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def test_agg_over_numpy_arrays():
result = df.groupby("category").agg(sum)

expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]]
expected_index = pd.Index([1, 2], name="category")
expected_index = Index([1, 2], name="category")
expected_column = ["arraydata"]
expected = DataFrame(expected_data, index=expected_index, columns=expected_column)

Expand Down Expand Up @@ -497,7 +497,7 @@ def test_sum_uint64_overflow():
df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)
df = df + 9223372036854775807

index = pd.Index(
index = Index(
[9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64
)
expected = DataFrame(
Expand Down Expand Up @@ -596,7 +596,7 @@ def test_agg_lambda_with_timezone():
result = df.groupby("tag").agg({"date": lambda e: e.head(1)})
expected = DataFrame(
[pd.Timestamp("2018-01-01", tz="UTC")],
index=pd.Index([1], name="tag"),
index=Index([1], name="tag"),
columns=["date"],
)
tm.assert_frame_equal(result, expected)
Expand Down
11 changes: 5 additions & 6 deletions pandas/tests/groupby/test_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pytest

import pandas as pd
from pandas import (
DataFrame,
Index,
Expand Down Expand Up @@ -260,7 +259,7 @@ def test_groupby_timedelta_cython_count():
df = DataFrame(
{"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")}
)
expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt")
expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt")
result = df.groupby("g").delt.count()
tm.assert_series_equal(expected, result)

Expand Down Expand Up @@ -317,12 +316,12 @@ def test_count_non_nulls():
def test_count_object():
df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3})
result = df.groupby("c").a.count()
expected = Series([3, 3], index=pd.Index([2, 3], name="c"), name="a")
expected = Series([3, 3], index=Index([2, 3], name="c"), name="a")
tm.assert_series_equal(result, expected)

df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3})
result = df.groupby("c").a.count()
expected = Series([1, 3], index=pd.Index([2, 3], name="c"), name="a")
expected = Series([1, 3], index=Index([2, 3], name="c"), name="a")
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -354,7 +353,7 @@ def test_lower_int_prec_count():
)
result = df.groupby("grp").count()
expected = DataFrame(
{"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp")
{"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=Index(list("ab"), name="grp")
)
tm.assert_frame_equal(result, expected)

Expand All @@ -374,5 +373,5 @@ def __eq__(self, other):

df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)})
result = df.groupby("grp").count()
expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp"))
expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp"))
tm.assert_frame_equal(result, expected)
32 changes: 16 additions & 16 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,12 +614,12 @@ def test_list_grouper_with_nat(self):

# Grouper in a list grouping
result = df.groupby([grouper])
expected = {pd.Timestamp("2011-01-01"): Index(list(range(364)))}
expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
tm.assert_dict_equal(result.groups, expected)

# Test case without a list
result = df.groupby(grouper)
expected = {pd.Timestamp("2011-01-01"): 365}
expected = {Timestamp("2011-01-01"): 365}
tm.assert_dict_equal(result.groups, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -938,12 +938,12 @@ def test_groupby_with_small_elem(self):
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
assert len(grouped.groups) == 2
assert grouped.ngroups == 2
assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups
assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups
assert (Timestamp("2014-09-30"), "start") in grouped.groups
assert (Timestamp("2013-10-31"), "start") in grouped.groups

res = grouped.get_group((pd.Timestamp("2014-09-30"), "start"))
res = grouped.get_group((Timestamp("2014-09-30"), "start"))
tm.assert_frame_equal(res, df.iloc[[0], :])
res = grouped.get_group((pd.Timestamp("2013-10-31"), "start"))
res = grouped.get_group((Timestamp("2013-10-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[1], :])

df = DataFrame(
Expand All @@ -953,12 +953,12 @@ def test_groupby_with_small_elem(self):
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
assert len(grouped.groups) == 2
assert grouped.ngroups == 2
assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups
assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups
assert (Timestamp("2014-09-30"), "start") in grouped.groups
assert (Timestamp("2013-10-31"), "start") in grouped.groups

res = grouped.get_group((pd.Timestamp("2014-09-30"), "start"))
res = grouped.get_group((Timestamp("2014-09-30"), "start"))
tm.assert_frame_equal(res, df.iloc[[0, 2], :])
res = grouped.get_group((pd.Timestamp("2013-10-31"), "start"))
res = grouped.get_group((Timestamp("2013-10-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[1], :])

# length=3
Expand All @@ -969,15 +969,15 @@ def test_groupby_with_small_elem(self):
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
assert len(grouped.groups) == 3
assert grouped.ngroups == 3
assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups
assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups
assert (pd.Timestamp("2014-08-31"), "start") in grouped.groups
assert (Timestamp("2014-09-30"), "start") in grouped.groups
assert (Timestamp("2013-10-31"), "start") in grouped.groups
assert (Timestamp("2014-08-31"), "start") in grouped.groups

res = grouped.get_group((pd.Timestamp("2014-09-30"), "start"))
res = grouped.get_group((Timestamp("2014-09-30"), "start"))
tm.assert_frame_equal(res, df.iloc[[0], :])
res = grouped.get_group((pd.Timestamp("2013-10-31"), "start"))
res = grouped.get_group((Timestamp("2013-10-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[1], :])
res = grouped.get_group((pd.Timestamp("2014-08-31"), "start"))
res = grouped.get_group((Timestamp("2014-08-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[2], :])

def test_grouping_string_repr(self):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def test_quantile_missing_group_values_correct_results(
df = DataFrame({"key": key, "val": val})

expected = DataFrame(
expected_val, index=pd.Index(expected_key, name="key"), columns=["val"]
expected_val, index=Index(expected_key, name="key"), columns=["val"]
)

grp = df.groupby("key")
Expand Down Expand Up @@ -223,7 +223,7 @@ def test_groupby_quantile_nullable_array(values, q):
idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None])
true_quantiles = [0.0, 0.5, 1.0]
else:
idx = pd.Index(["x", "y"], name="a")
idx = Index(["x", "y"], name="a")
true_quantiles = [0.5]

expected = pd.Series(true_quantiles * 2, index=idx, name="b")
Expand Down Expand Up @@ -251,6 +251,6 @@ def test_groupby_timedelta_quantile():
pd.Timedelta("0 days 00:00:02.990000"),
]
},
index=pd.Index([1, 2], name="group"),
index=Index([1, 2], name="group"),
)
tm.assert_frame_equal(result, expected)
10 changes: 5 additions & 5 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,15 +452,15 @@ def test_groupby_groups_datetimeindex(self):
result = df.groupby(level="date").groups
dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"]
expected = {
Timestamp(date): pd.DatetimeIndex([date], name="date") for date in dates
Timestamp(date): DatetimeIndex([date], name="date") for date in dates
}
tm.assert_dict_equal(result, expected)

grouped = df.groupby(level="date")
for date in dates:
result = grouped.get_group(date)
data = [[df.loc[date, "A"], df.loc[date, "B"]]]
expected_index = pd.DatetimeIndex([date], name="date", freq="D")
expected_index = DatetimeIndex([date], name="date", freq="D")
expected = DataFrame(data, columns=list("AB"), index=expected_index)
tm.assert_frame_equal(result, expected)

Expand All @@ -484,7 +484,7 @@ def test_groupby_groups_datetimeindex_tz(self):
)
df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific"))

exp_idx1 = pd.DatetimeIndex(
exp_idx1 = DatetimeIndex(
[
"2011-07-19 07:00:00",
"2011-07-19 07:00:00",
Expand All @@ -508,13 +508,13 @@ def test_groupby_groups_datetimeindex_tz(self):
tm.assert_frame_equal(result, expected)

# by level
didx = pd.DatetimeIndex(dates, tz="Asia/Tokyo")
didx = DatetimeIndex(dates, tz="Asia/Tokyo")
df = DataFrame(
{"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]},
index=didx,
)

exp_idx = pd.DatetimeIndex(
exp_idx = DatetimeIndex(
["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
tz="Asia/Tokyo",
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ def test_categorical_and_not_categorical_key(observed):
# GH 32494
df_with_categorical = DataFrame(
{
"A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]),
"A": Categorical(["a", "b", "a"], categories=["a", "b", "c"]),
"B": [1, 2, 3],
"C": ["a", "b", "a"],
}
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/period/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import pandas as pd
from pandas import PeriodIndex
from pandas import PeriodIndex, Series
import pandas._testing as tm


Expand Down Expand Up @@ -154,7 +154,7 @@ def test_representation_to_series(self):
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
):
result = repr(pd.Series(idx))
result = repr(Series(idx))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on line 117 we have

exp1 = """Series([], dtype: period[D])"""

so this is to be consistent with that

assert result == expected

def test_summary(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def test_equals_numeric_other_index_type(self, other):
def test_lookups_datetimelike_values(self, vals):
# If we have datetime64 or timedelta64 values, make sure they are
# wrappped correctly GH#31163
ser = pd.Series(vals, index=range(3, 6))
ser = Series(vals, index=range(3, 6))
ser.index = ser.index.astype("float64")

expected = vals[1]
Expand Down Expand Up @@ -642,7 +642,7 @@ def test_range_float_union_dtype():
def test_uint_index_does_not_convert_to_float64(box):
# https://github.com/pandas-dev/pandas/issues/28279
# https://github.com/pandas-dev/pandas/issues/28023
series = pd.Series(
series = Series(
[0, 1, 2, 3, 4, 5],
index=[
7606741985629028552,
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/timedeltas/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

import pandas as pd
from pandas import TimedeltaIndex
from pandas import Series, TimedeltaIndex


class TestTimedeltaIndexRendering:
Expand Down Expand Up @@ -62,7 +62,7 @@ def test_representation_to_series(self):
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]
):
result = repr(pd.Series(idx))
result = repr(Series(idx))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for consistency with

exp1 = """Series([], dtype: timedelta64[ns])"""

on line 46

assert result == expected

def test_summary(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def test_with_missing_lzma_runtime():
import sys
import pytest
sys.modules['lzma'] = None
import pandas
df = pandas.DataFrame()
import pandas as pd
df = pd.DataFrame()
with pytest.raises(RuntimeError, match='lzma module'):
df.to_csv('foo.csv', compression='xz')
"""
Expand Down
32 changes: 16 additions & 16 deletions pandas/tests/resample/test_time_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

import pandas as pd
from pandas import DataFrame, Series
from pandas import DataFrame, Series, Timestamp
import pandas._testing as tm
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
Expand Down Expand Up @@ -306,21 +306,21 @@ def test_groupby_resample_interpolate():
expected_ind = pd.MultiIndex.from_tuples(
[
(50, "2018-01-07"),
(50, pd.Timestamp("2018-01-08")),
(50, pd.Timestamp("2018-01-09")),
(50, pd.Timestamp("2018-01-10")),
(50, pd.Timestamp("2018-01-11")),
(50, pd.Timestamp("2018-01-12")),
(50, pd.Timestamp("2018-01-13")),
(50, pd.Timestamp("2018-01-14")),
(50, pd.Timestamp("2018-01-15")),
(50, pd.Timestamp("2018-01-16")),
(50, pd.Timestamp("2018-01-17")),
(50, pd.Timestamp("2018-01-18")),
(50, pd.Timestamp("2018-01-19")),
(50, pd.Timestamp("2018-01-20")),
(50, pd.Timestamp("2018-01-21")),
(60, pd.Timestamp("2018-01-14")),
(50, Timestamp("2018-01-08")),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are for consistency with

        "convention='e', origin=Timestamp('2000-01-01 00:00:00'))"

on line 262

(50, Timestamp("2018-01-09")),
(50, Timestamp("2018-01-10")),
(50, Timestamp("2018-01-11")),
(50, Timestamp("2018-01-12")),
(50, Timestamp("2018-01-13")),
(50, Timestamp("2018-01-14")),
(50, Timestamp("2018-01-15")),
(50, Timestamp("2018-01-16")),
(50, Timestamp("2018-01-17")),
(50, Timestamp("2018-01-18")),
(50, Timestamp("2018-01-19")),
(50, Timestamp("2018-01-20")),
(50, Timestamp("2018-01-21")),
(60, Timestamp("2018-01-14")),
],
names=["volume", "week_starting"],
)
Expand Down
Loading