Skip to content

CI: Check for inconsistent pandas namespace usage #37188

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Oct 21, 2020
Merged
12 changes: 12 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ function invgrep {
return $((! $EXIT_STATUS))
}

function check_namespace {
grep -R -l --include="test_analytics.py" " ${1}(" | xargs grep -n "pd\.${1}("
test $? -gt 0
}

if [[ "$GITHUB_ACTIONS" == "true" ]]; then
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
INVGREP_PREPEND="##[error]"
Expand Down Expand Up @@ -252,6 +257,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
MSG='Check code for instances of os.remove' ; echo $MSG
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for inconsistent use of pandas namespace' ; echo $MSG
check_namespace "Series"
RET=$(($RET + $?))
check_namespace "DataFrame"
RET=$(($RET + $?))
echo $MSG "DONE"
fi

### CODE ###
Expand Down
22 changes: 11 additions & 11 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def test_isna_isnull(self, isna_f):
assert not isna_f(-np.inf)

# type
assert not isna_f(type(pd.Series(dtype=object)))
assert not isna_f(type(pd.Series(dtype=np.float64)))
assert not isna_f(type(Series(dtype=object)))
assert not isna_f(type(Series(dtype=np.float64)))
assert not isna_f(type(pd.DataFrame()))

# series
Expand Down Expand Up @@ -247,11 +247,11 @@ def test_datetime_other_units(self):
tm.assert_numpy_array_equal(isna(values), exp)
tm.assert_numpy_array_equal(notna(values), ~exp)

exp = pd.Series([False, True, False])
s = pd.Series(values)
exp = Series([False, True, False])
s = Series(values)
tm.assert_series_equal(isna(s), exp)
tm.assert_series_equal(notna(s), ~exp)
s = pd.Series(values, dtype=object)
s = Series(values, dtype=object)
tm.assert_series_equal(isna(s), exp)
tm.assert_series_equal(notna(s), ~exp)

Expand All @@ -278,11 +278,11 @@ def test_timedelta_other_units(self):
tm.assert_numpy_array_equal(isna(values), exp)
tm.assert_numpy_array_equal(notna(values), ~exp)

exp = pd.Series([False, True, False])
s = pd.Series(values)
exp = Series([False, True, False])
s = Series(values)
tm.assert_series_equal(isna(s), exp)
tm.assert_series_equal(notna(s), ~exp)
s = pd.Series(values, dtype=object)
s = Series(values, dtype=object)
tm.assert_series_equal(isna(s), exp)
tm.assert_series_equal(notna(s), ~exp)

Expand All @@ -292,11 +292,11 @@ def test_period(self):
tm.assert_numpy_array_equal(isna(idx), exp)
tm.assert_numpy_array_equal(notna(idx), ~exp)

exp = pd.Series([False, True, False])
s = pd.Series(idx)
exp = Series([False, True, False])
s = Series(idx)
tm.assert_series_equal(isna(s), exp)
tm.assert_series_equal(notna(s), ~exp)
s = pd.Series(idx, dtype=object)
s = Series(idx, dtype=object)
tm.assert_series_equal(isna(s), exp)
tm.assert_series_equal(notna(s), ~exp)

Expand Down
34 changes: 17 additions & 17 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_drop_and_dropna_caching(self):
# tst that cacher updates
original = Series([1, 2, np.nan], name="A")
expected = Series([1, 2], dtype=original.dtype, name="A")
df = pd.DataFrame({"A": original.values.copy()})
df = DataFrame({"A": original.values.copy()})
df2 = df.copy()
df["A"].dropna()
tm.assert_series_equal(df["A"], original)
Expand Down Expand Up @@ -203,7 +203,7 @@ def test_dropna_categorical_interval_index(self):
# GH 25087
ii = pd.IntervalIndex.from_breaks([0, 2.78, 3.14, 6.28])
ci = pd.CategoricalIndex(ii)
df = pd.DataFrame({"A": list("abc")}, index=ci)
df = DataFrame({"A": list("abc")}, index=ci)

expected = df
result = df.dropna()
Expand Down Expand Up @@ -303,8 +303,8 @@ def test_fillna_datelike(self):
def test_fillna_tzaware(self):
# with timezone
# GH#15855
df = pd.DataFrame({"A": [pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]})
exp = pd.DataFrame(
df = DataFrame({"A": [pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]})
exp = DataFrame(
{
"A": [
pd.Timestamp("2012-11-11 00:00:00+01:00"),
Expand All @@ -314,8 +314,8 @@ def test_fillna_tzaware(self):
)
tm.assert_frame_equal(df.fillna(method="pad"), exp)

df = pd.DataFrame({"A": [pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]})
exp = pd.DataFrame(
df = DataFrame({"A": [pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]})
exp = DataFrame(
{
"A": [
pd.Timestamp("2012-11-11 00:00:00+01:00"),
Expand All @@ -328,14 +328,14 @@ def test_fillna_tzaware(self):
def test_fillna_tzaware_different_column(self):
# with timezone in another column
# GH#15522
df = pd.DataFrame(
df = DataFrame(
{
"A": pd.date_range("20130101", periods=4, tz="US/Eastern"),
"B": [1, 2, np.nan, np.nan],
}
)
result = df.fillna(method="pad")
expected = pd.DataFrame(
expected = DataFrame(
{
"A": pd.date_range("20130101", periods=4, tz="US/Eastern"),
"B": [1.0, 2.0, 2.0, 2.0],
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_na_actions_categorical(self):

# make sure that fillna takes missing values into account
c = Categorical([np.nan, "b", np.nan], categories=["a", "b"])
df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]})
df = DataFrame({"cats": c, "vals": [1, 2, 3]})

cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"])
df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})
Expand Down Expand Up @@ -427,15 +427,15 @@ def test_fillna_categorical_nan(self):
def test_fillna_downcast(self):
# GH 15277
# infer int64 from float64
df = pd.DataFrame({"a": [1.0, np.nan]})
df = DataFrame({"a": [1.0, np.nan]})
result = df.fillna(0, downcast="infer")
expected = pd.DataFrame({"a": [1, 0]})
expected = DataFrame({"a": [1, 0]})
tm.assert_frame_equal(result, expected)

# infer int64 from float64 when fillna value is a dict
df = pd.DataFrame({"a": [1.0, np.nan]})
df = DataFrame({"a": [1.0, np.nan]})
result = df.fillna({"a": 0}, downcast="infer")
expected = pd.DataFrame({"a": [1, 0]})
expected = DataFrame({"a": [1, 0]})
tm.assert_frame_equal(result, expected)

def test_fillna_dtype_conversion(self):
Expand Down Expand Up @@ -464,7 +464,7 @@ def test_fillna_dtype_conversion(self):

def test_fillna_datetime_columns(self):
# GH 7095
df = pd.DataFrame(
df = DataFrame(
{
"A": [-1, -2, np.nan],
"B": date_range("20130101", periods=3),
Expand All @@ -474,7 +474,7 @@ def test_fillna_datetime_columns(self):
index=date_range("20130110", periods=3),
)
result = df.fillna("?")
expected = pd.DataFrame(
expected = DataFrame(
{
"A": [-1, -2, "?"],
"B": date_range("20130101", periods=3),
Expand All @@ -485,7 +485,7 @@ def test_fillna_datetime_columns(self):
)
tm.assert_frame_equal(result, expected)

df = pd.DataFrame(
df = DataFrame(
{
"A": [-1, -2, np.nan],
"B": [pd.Timestamp("2013-01-01"), pd.Timestamp("2013-01-02"), pd.NaT],
Expand All @@ -495,7 +495,7 @@ def test_fillna_datetime_columns(self):
index=date_range("20130110", periods=3),
)
result = df.fillna("?")
expected = pd.DataFrame(
expected = DataFrame(
{
"A": [-1, -2, "?"],
"B": [pd.Timestamp("2013-01-01"), pd.Timestamp("2013-01-02"), "?"],
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/groupby/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@pytest.mark.parametrize("func", ["ffill", "bfill"])
def test_groupby_column_index_name_lost_fill_funcs(func):
# GH: 29764 groupby loses index sometimes
df = pd.DataFrame(
df = DataFrame(
[[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]],
columns=pd.Index(["type", "a", "b"], name="idx"),
)
Expand All @@ -22,10 +22,10 @@ def test_groupby_column_index_name_lost_fill_funcs(func):
@pytest.mark.parametrize("func", ["ffill", "bfill"])
def test_groupby_fill_duplicate_column_names(func):
# GH: 25610 ValueError with duplicate column names
df1 = pd.DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]})
df2 = pd.DataFrame({"field1": [1, np.nan, 4]})
df1 = DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]})
df2 = DataFrame({"field1": [1, np.nan, 4]})
df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"])
expected = pd.DataFrame(
expected = DataFrame(
[[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"]
)
result = getattr(df_grouped, func)()
Expand All @@ -34,7 +34,7 @@ def test_groupby_fill_duplicate_column_names(func):

def test_ffill_missing_arguments():
# GH 14955
df = pd.DataFrame({"a": [1, 2], "b": [1, 1]})
df = DataFrame({"a": [1, 2], "b": [1, 1]})
with pytest.raises(ValueError, match="Must specify a fill"):
df.groupby("b").fillna()

Expand Down Expand Up @@ -90,7 +90,7 @@ def test_fill_consistency():
def test_ffill_handles_nan_groups(dropna, method, has_nan_group):
# GH 34725

df_without_nan_rows = pd.DataFrame([(1, 0.1), (2, 0.2)])
df_without_nan_rows = DataFrame([(1, 0.1), (2, 0.2)])

ridx = [-1, 0, -1, -1, 1, -1]
df = df_without_nan_rows.reindex(ridx).reset_index(drop=True)
Expand Down
28 changes: 14 additions & 14 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def test_datetime64_tz_fillna(self, tz):
pd.NaT,
]
)
null_loc = pd.Series([False, True, False, True])
null_loc = Series([False, True, False, True])

result = s.fillna(pd.Timestamp("2011-01-02 10:00"))
expected = Series(
Expand Down Expand Up @@ -247,7 +247,7 @@ def test_datetime64_tz_fillna(self, tz):
idx = pd.DatetimeIndex(
["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz=tz
)
s = pd.Series(idx)
s = Series(idx)
assert s.dtype == f"datetime64[ns, {tz}]"
tm.assert_series_equal(pd.isna(s), null_loc)

Expand Down Expand Up @@ -366,17 +366,17 @@ def test_datetime64_tz_fillna(self, tz):
def test_fillna_dt64tz_with_method(self):
# with timezone
# GH 15855
ser = pd.Series([pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT])
exp = pd.Series(
ser = Series([pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT])
exp = Series(
[
pd.Timestamp("2012-11-11 00:00:00+01:00"),
pd.Timestamp("2012-11-11 00:00:00+01:00"),
]
)
tm.assert_series_equal(ser.fillna(method="pad"), exp)

ser = pd.Series([pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")])
exp = pd.Series(
ser = Series([pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")])
exp = Series(
[
pd.Timestamp("2012-11-11 00:00:00+01:00"),
pd.Timestamp("2012-11-11 00:00:00+01:00"),
Expand Down Expand Up @@ -421,13 +421,13 @@ def test_fillna_consistency(self):
def test_datetime64tz_fillna_round_issue(self):
# GH 14872

data = pd.Series(
data = Series(
[pd.NaT, pd.NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)]
)

filled = data.fillna(method="bfill")

expected = pd.Series(
expected = Series(
[
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
Expand All @@ -440,15 +440,15 @@ def test_datetime64tz_fillna_round_issue(self):
def test_fillna_downcast(self):
# GH 15277
# infer int64 from float64
s = pd.Series([1.0, np.nan])
s = Series([1.0, np.nan])
result = s.fillna(0, downcast="infer")
expected = pd.Series([1, 0])
expected = Series([1, 0])
tm.assert_series_equal(result, expected)

# infer int64 from float64 when fillna value is a dict
s = pd.Series([1.0, np.nan])
s = Series([1.0, np.nan])
result = s.fillna({1: 0}, downcast="infer")
expected = pd.Series([1, 0])
expected = Series([1, 0])
tm.assert_series_equal(result, expected)

def test_fillna_int(self):
Expand Down Expand Up @@ -627,7 +627,7 @@ def test_ffill(self):

def test_ffill_mixed_dtypes_without_missing_data(self):
# GH14956
series = pd.Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
result = series.ffill()
tm.assert_series_equal(series, result)

Expand Down Expand Up @@ -710,7 +710,7 @@ def test_datetime64_tz_dropna(self):
idx = pd.DatetimeIndex(
["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz="Asia/Tokyo"
)
s = pd.Series(idx)
s = Series(idx)
assert s.dtype == "datetime64[ns, Asia/Tokyo]"
result = s.dropna()
expected = Series(
Expand Down