diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3b788cc2df227..71dedfaee8c04 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,6 @@ repos: - id: codespell types_or: [python, rst, markdown] files: ^(pandas|doc)/ - exclude: ^pandas/tests/ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 hooks: diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 78c704f2e43bb..96d2c246dd0ee 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -264,7 +264,7 @@ def test_loc_multiindex_incomplete(self): tm.assert_series_equal(result, expected) # GH 7400 - # multiindexer gettitem with list of indexers skips wrong element + # multiindexer getitem with list of indexers skips wrong element s = Series( np.arange(15, dtype="int64"), MultiIndex.from_product([range(5), ["a", "b", "c"]]), @@ -385,7 +385,7 @@ def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value): [ ([], []), # empty ok (["A"], slice(3)), - (["A", "D"], []), # "D" isnt present -> raise + (["A", "D"], []), # "D" isn't present -> raise (["D", "E"], []), # no values found -> raise (["D"], []), # same, with single item list: GH 27148 (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), @@ -531,7 +531,7 @@ def test_loc_period_string_indexing(): # GH 9892 a = pd.period_range("2013Q1", "2013Q4", freq="Q") i = (1111, 2222, 3333) - idx = MultiIndex.from_product((a, i), names=("Periode", "CVR")) + idx = MultiIndex.from_product((a, i), names=("Period", "CVR")) df = DataFrame( index=idx, columns=( @@ -552,7 +552,7 @@ def test_loc_period_string_indexing(): dtype=object, name="OMS", index=MultiIndex.from_tuples( - [(pd.Period("2013Q1"), 1111)], names=["Periode", "CVR"] + [(pd.Period("2013Q1"), 1111)], names=["Period", "CVR"] ), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 9dbce283d2a8f..bec442b7f48ac 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1072,7 +1072,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self): tm.assert_frame_equal(result, expected) # assigning with loc/iloc attempts to set the values inplace, which - # in this case is succesful + # in this case is successful result.loc[result.index, "A"] = [float(x) for x in col_data] expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 2f5764ab5bd77..3c37d827c0778 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -829,8 +829,8 @@ def assert_slice_ok(mgr, axis, slobj): elif mgr.ndim == 1 and axis == 0: sliced = mgr.getitem_mgr(slobj) else: - # BlockManager doesnt support non-slice, SingleBlockManager - # doesnt support axis > 0 + # BlockManager doesn't support non-slice, SingleBlockManager + # doesn't support axis > 0 return mat_slobj = (slice(None),) * axis + (slobj,) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 8c324d73a7e54..5a7d571e3a701 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -281,7 +281,7 @@ def test_append_frame_column_oriented(setup_path): # column oriented df = tm.makeTimeDataFrame() - df.index = df.index._with_freq(None) # freq doesnt round-trip + df.index = df.index._with_freq(None) # freq doesn't round-trip _maybe_remove(store, "df1") store.append("df1", df.iloc[:, :2], axes=["columns"]) @@ -331,7 +331,7 @@ def test_append_with_different_block_ordering(setup_path): store.append("df", df) # test a different ordering but with more fields (like invalid - # combinate) + # combinations) with ensure_clean_store(setup_path) as store: df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64") diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 03d3d838a936c..97edc3cdffdf7 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -350,7 +350,7 @@ def test_timeseries_preepoch(setup_path): try: _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) except OverflowError: - pytest.skip("known failer on some windows platforms") + pytest.skip("known failure on some windows platforms") @pytest.mark.parametrize( diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 8ad5dbc049380..0d6ee7d6efb85 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -663,13 +663,13 @@ def test_frame_select_complex(setup_path): def test_frame_select_complex2(setup_path): - with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths: + with ensure_clean_path(["params.hdf", "hist.hdf"]) as paths: pp, hh = paths # use non-trivial selection criteria - parms = DataFrame({"A": [1, 1, 2, 2, 3]}) - parms.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"]) + params = DataFrame({"A": [1, 1, 2, 2, 3]}) + params.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"]) selection = read_hdf(pp, "df", where="A=[2,3]") hist = DataFrame( diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 0532ddd17cd19..4aa6f94ca38e9 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -137,7 +137,7 @@ def test_append_with_timezones_as_index(setup_path, gettz): # GH#4098 example dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern")) - dti = dti._with_freq(None) # freq doesnt round-trip + dti = dti._with_freq(None) # freq doesn't round-trip df = DataFrame({"A": Series(range(3), index=dti)}) @@ -217,7 +217,7 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): # index rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") - rng = rng._with_freq(None) # freq doesnt round-trip + rng = rng._with_freq(None) # freq doesn't round-trip df = DataFrame(np.random.randn(len(rng), 4), index=rng) store["df"] = df result = store["df"] @@ -334,7 +334,7 @@ def test_dst_transitions(setup_path): freq="H", ambiguous="infer", ) - times = times._with_freq(None) # freq doesnt round-trip + times = times._with_freq(None) # freq doesn't round-trip for i in [times, times + pd.Timedelta("10min")]: _maybe_remove(store, "df") diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 97793ce8f65b8..89a3d4f2ae083 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -411,12 +411,12 @@ def test_attrs_cols_prefix(datapath, parser): def test_attrs_unknown_column(parser): with pytest.raises(KeyError, match=("no valid column")): - geom_df.to_xml(attr_cols=["shape", "degreees", "sides"], parser=parser) + geom_df.to_xml(attr_cols=["shape", "degree", "sides"], parser=parser) def test_attrs_wrong_type(parser): with pytest.raises(TypeError, match=("is not a valid type for attr_cols")): - geom_df.to_xml(attr_cols='"shape", "degreees", "sides"', parser=parser) + geom_df.to_xml(attr_cols='"shape", "degree", "sides"', parser=parser) # ELEM_COLS @@ -453,12 +453,12 @@ def test_elems_cols_nan_output(datapath, parser): def test_elems_unknown_column(parser): with pytest.raises(KeyError, match=("no valid column")): - geom_df.to_xml(elem_cols=["shape", "degreees", "sides"], parser=parser) + geom_df.to_xml(elem_cols=["shape", "degree", "sides"], parser=parser) def test_elems_wrong_type(parser): with pytest.raises(TypeError, match=("is not a valid type for elem_cols")): - geom_df.to_xml(elem_cols='"shape", "degreees", "sides"', parser=parser) + geom_df.to_xml(elem_cols='"shape", "degree", "sides"', parser=parser) def test_elems_and_attrs_cols(datapath, parser): diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index a28e2f22560eb..04a8aeefbfcd6 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -170,7 +170,7 @@ def test_no_reallocation(self, table_type, dtype): n_buckets_start = preallocated_table.get_state()["n_buckets"] preallocated_table.map_locations(keys) n_buckets_end = preallocated_table.get_state()["n_buckets"] - # orgininal number of buckets was enough: + # original number of buckets was enough: assert n_buckets_start == n_buckets_end # check with clean table (not too much preallocated) clean_table = table_type() @@ -219,7 +219,7 @@ def test_no_reallocation_StringHashTable(): n_buckets_start = preallocated_table.get_state()["n_buckets"] preallocated_table.map_locations(keys) n_buckets_end = preallocated_table.get_state()["n_buckets"] - # orgininal number of buckets was enough: + # original number of buckets was enough: assert n_buckets_start == n_buckets_end # check with clean table (not too much preallocated) clean_table = ht.StringHashTable() diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 3c53a0ed2500c..bed60be169e57 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -2208,7 +2208,7 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_xlabel() == old_label assert ax.get_ylabel() == "" - # old xlabel will be overriden and assigned ylabel will be used as ylabel + # old xlabel will be overridden and assigned ylabel will be used as ylabel ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label) assert ax.get_ylabel() == str(new_label) assert ax.get_xlabel() == str(new_label) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 0e25fb5f4c01f..fa4a132001be5 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -522,7 +522,7 @@ def test_xlabel_ylabel_dataframe_subplots( assert all(ax.get_ylabel() == "" for ax in axes) assert all(ax.get_xlabel() == old_label for ax in axes) - # old xlabel will be overriden and assigned ylabel will be used as ylabel + # old xlabel will be overridden and assigned ylabel will be used as ylabel axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) assert all(ax.get_ylabel() == str(new_label) for ax in axes) assert all(ax.get_xlabel() == str(new_label) for ax in axes) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index a6e3ba71e94ab..96fdcebc9b8f7 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -533,7 +533,7 @@ def test_hist_secondary_legend(self): _, ax = self.plt.subplots() ax = df["a"].plot.hist(legend=True, ax=ax) df["b"].plot.hist(ax=ax, legend=True, secondary_y=True) - # both legends are dran on left ax + # both legends are drawn on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b (right)"]) assert ax.get_yaxis().get_visible() diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 59b0cc99d94fb..812aae8d97151 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -381,7 +381,7 @@ def test_df_series_secondary_legend(self): _, ax = self.plt.subplots() ax = df.plot(ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) - # both legends are dran on left ax + # both legends are drawn on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) assert ax.get_yaxis().get_visible() @@ -392,7 +392,7 @@ def test_df_series_secondary_legend(self): _, ax = self.plt.subplots() ax = df.plot(ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) - # both legends are dran on left ax + # both legends are drawn on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) assert ax.get_yaxis().get_visible() @@ -403,7 +403,7 @@ def test_df_series_secondary_legend(self): _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) - # both legends are dran on left ax + # both legends are drawn on left ax # left axis must be invisible and right axis must be visible expected = ["a (right)", "b (right)", "c (right)", "x (right)"] self._check_legend_labels(ax.left_ax, labels=expected) @@ -415,7 +415,7 @@ def test_df_series_secondary_legend(self): _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) - # both legends are dran on left ax + # both legends are drawn on left ax # left axis must be invisible and right axis must be visible expected = ["a (right)", "b (right)", "c (right)", "x (right)"] self._check_legend_labels(ax.left_ax, expected) @@ -427,7 +427,7 @@ def test_df_series_secondary_legend(self): _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, mark_right=False, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) - # both legends are dran on left ax + # both legends are drawn on left ax # left axis must be invisible and right axis must be visible expected = ["a", "b", "c", "x (right)"] self._check_legend_labels(ax.left_ax, expected) @@ -798,7 +798,7 @@ def test_xlabel_ylabel_series(self, kind, index_name, old_label, new_label): assert ax.get_ylabel() == "" assert ax.get_xlabel() == old_label - # old xlabel will be overriden and assigned ylabel will be used as ylabel + # old xlabel will be overridden and assigned ylabel will be used as ylabel ax = ser.plot(kind=kind, ylabel=new_label, xlabel=new_label) assert ax.get_ylabel() == new_label assert ax.get_xlabel() == new_label diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 332c3c8f30562..2b8233388d328 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -457,7 +457,7 @@ def test_concat_tz_not_aligned(self): ) def test_concat_tz_NaT(self, t1): # GH#22796 - # Concating tz-aware multicolumn DataFrames + # Concatenating tz-aware multicolumn DataFrames ts1 = Timestamp(t1, tz="UTC") ts2 = Timestamp("2015-01-01", tz="UTC") ts3 = Timestamp("2015-01-01", tz="UTC") diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 4a4af789d540b..0268801c66e1d 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -183,19 +183,19 @@ def test_list_type_by(self, left, right, on, left_by, right_by, expected): def test_left_by_length_equals_to_right_shape0(self): # GH 38166 - left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHT")) - right = DataFrame([[2, 1]], columns=list("TE")) - result = merge_ordered(left, right, on="T", left_by=["G", "H"]) + left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE")) + right = DataFrame([[2, 1]], columns=list("ET")) + result = merge_ordered(left, right, on="E", left_by=["G", "H"]) expected = DataFrame( - {"G": ["g"] * 3, "H": ["h"] * 3, "T": [1, 2, 3], "E": [np.nan, 1.0, np.nan]} + {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]} ) tm.assert_frame_equal(result, expected) def test_elements_not_in_by_but_in_df(self): # GH 38167 - left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHT")) - right = DataFrame([[2, 1]], columns=list("TE")) + left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE")) + right = DataFrame([[2, 1]], columns=list("ET")) msg = r"\{'h'\} not found in left columns" with pytest.raises(KeyError, match=msg): - merge_ordered(left, right, on="T", left_by=["G", "h"]) + merge_ordered(left, right, on="E", left_by=["G", "h"]) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 9ccdd0261de0e..96aea4da9fac5 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -527,7 +527,7 @@ def test_to_numpy_alias(): pytest.param( Timedelta(0).to_timedelta64(), marks=pytest.mark.xfail( - reason="td64 doesnt return NotImplemented, see numpy#17017" + reason="td64 doesn't return NotImplemented, see numpy#17017" ), ), Timestamp(0), @@ -535,7 +535,7 @@ def test_to_numpy_alias(): pytest.param( Timestamp(0).to_datetime64(), marks=pytest.mark.xfail( - reason="dt64 doesnt return NotImplemented, see numpy#17017" + reason="dt64 doesn't return NotImplemented, see numpy#17017" ), ), Timestamp(0).tz_localize("UTC"), diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index f7b49c187c794..b68c9c9b0e529 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -12,7 +12,7 @@ # test Series, the default dtype for the expected result (which is valid # for most cases), and the specific cases where the result deviates from # this default. Those overrides are defined as a dict with (keyword, val) as -# dictionary key. In case of multiple items, the last override takes precendence. +# dictionary key. In case of multiple items, the last override takes precedence. test_cases = [ ( # data diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index b1aa09f387a13..3af06145b9fcd 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -98,7 +98,7 @@ class TestSeriesNLargestNSmallest: ) def test_nlargest_error(self, r): dt = r.dtype - msg = f"Cannot use method 'n(larg|small)est' with dtype {dt}" + msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}" args = 2, len(r), 0, -1 methods = r.nlargest, r.nsmallest for method, arg in product(methods, args): diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index a22e125e68cba..9684546112078 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -25,7 +25,7 @@ def read_csv(self, path, **kwargs): return out def test_from_csv(self, datetime_series, string_series): - # freq doesnt round-trip + # freq doesn't round-trip datetime_series.index = datetime_series.index._with_freq(None) with tm.ensure_clean() as path: diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index cdaccf0dad8e6..05883248592ca 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -29,7 +29,7 @@ def test_str_cat_name(index_or_series, other): def test_str_cat(index_or_series): box = index_or_series # test_cat above tests "str_cat" from ndarray; - # here testing "str.cat" from Series/Indext to ndarray/list + # here testing "str.cat" from Series/Index to ndarray/list s = box(["a", "a", "b", "b", "c", np.nan]) # single array diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 13768a2cd7a61..999a04a81406e 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -515,7 +515,7 @@ def test_to_datetime_YYYYMMDD(self): assert actual == datetime(2008, 1, 15) def test_to_datetime_unparseable_ignore(self): - # unparseable + # unparsable s = "Month 1, 1999" assert to_datetime(s, errors="ignore") == s @@ -2469,7 +2469,7 @@ def test_empty_string_datetime_coerce__format(): with pytest.raises(ValueError, match="does not match format"): result = to_datetime(td, format=format, errors="raise") - # don't raise an expection in case no format is given + # don't raise an exception in case no format is given result = to_datetime(td, errors="raise") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index d36bea72908a3..3eb3892279832 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -64,7 +64,7 @@ class TestCommon(Base): - # exected value created by Base._get_offset + # executed value created by Base._get_offset # are applied to 2011/01/01 09:00 (Saturday) # used for .apply and .rollforward expecteds = { diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py index a45fcab56759f..e5fe998923f8d 100644 --- a/pandas/tests/tslibs/test_fields.py +++ b/pandas/tests/tslibs/test_fields.py @@ -7,7 +7,7 @@ def test_fields_readonly(): # https://github.com/vaexio/vaex/issues/357 - # fields functions should't raise when we pass read-only data + # fields functions shouldn't raise when we pass read-only data dtindex = np.arange(5, dtype=np.int64) * 10 ** 9 * 3600 * 24 * 32 dtindex.flags.writeable = False