STYLE: Extending codespell to pandas/tests/ part3 38802 (#40372)

01-vyom · web-flow · commit 082d43730f86 · 2021-03-11T19:03:28.000Z
* STYLE: Extending codespell to pandas/tests part 3

* DOC: small change in test_internals.py

* TST: small changes in test_to_xml.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,6 @@ repos:
     -   id: codespell
         types_or: [python, rst, markdown]
         files: ^(pandas|doc)/
-        exclude: ^pandas/tests/
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.4.0
     hooks:
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
@@ -264,7 +264,7 @@ def test_loc_multiindex_incomplete(self):
         tm.assert_series_equal(result, expected)
 
         # GH 7400
-        # multiindexer gettitem with list of indexers skips wrong element
+        # multiindexer getitem with list of indexers skips wrong element
         s = Series(
             np.arange(15, dtype="int64"),
             MultiIndex.from_product([range(5), ["a", "b", "c"]]),
@@ -385,7 +385,7 @@ def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
     [
         ([], []),  # empty ok
         (["A"], slice(3)),
-        (["A", "D"], []),  # "D" isnt present -> raise
+        (["A", "D"], []),  # "D" isn't present -> raise
         (["D", "E"], []),  # no values found -> raise
         (["D"], []),  # same, with single item list: GH 27148
         (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
@@ -531,7 +531,7 @@ def test_loc_period_string_indexing():
     # GH 9892
     a = pd.period_range("2013Q1", "2013Q4", freq="Q")
     i = (1111, 2222, 3333)
-    idx = MultiIndex.from_product((a, i), names=("Periode", "CVR"))
+    idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
     df = DataFrame(
         index=idx,
         columns=(
@@ -552,7 +552,7 @@ def test_loc_period_string_indexing():
         dtype=object,
         name="OMS",
         index=MultiIndex.from_tuples(
-            [(pd.Period("2013Q1"), 1111)], names=["Periode", "CVR"]
+            [(pd.Period("2013Q1"), 1111)], names=["Period", "CVR"]
         ),
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1072,7 +1072,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self):
         tm.assert_frame_equal(result, expected)
 
         # assigning with loc/iloc attempts to set the values inplace, which
-        #  in this case is succesful
+        #  in this case is successful
         result.loc[result.index, "A"] = [float(x) for x in col_data]
         expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -829,8 +829,8 @@ def assert_slice_ok(mgr, axis, slobj):
             elif mgr.ndim == 1 and axis == 0:
                 sliced = mgr.getitem_mgr(slobj)
             else:
-                # BlockManager doesnt support non-slice, SingleBlockManager
-                #  doesnt support axis > 0
+                # BlockManager doesn't support non-slice, SingleBlockManager
+                #  doesn't support axis > 0
                 return
 
             mat_slobj = (slice(None),) * axis + (slobj,)
diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py
@@ -281,7 +281,7 @@ def test_append_frame_column_oriented(setup_path):
 
         # column oriented
         df = tm.makeTimeDataFrame()
-        df.index = df.index._with_freq(None)  # freq doesnt round-trip
+        df.index = df.index._with_freq(None)  # freq doesn't round-trip
 
         _maybe_remove(store, "df1")
         store.append("df1", df.iloc[:, :2], axes=["columns"])
@@ -331,7 +331,7 @@ def test_append_with_different_block_ordering(setup_path):
             store.append("df", df)
 
     # test a different ordering but with more fields (like invalid
-    # combinate)
+    # combinations)
     with ensure_clean_store(setup_path) as store:
 
         df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64")
diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
@@ -350,7 +350,7 @@ def test_timeseries_preepoch(setup_path):
     try:
         _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
     except OverflowError:
-        pytest.skip("known failer on some windows platforms")
+        pytest.skip("known failure on some windows platforms")
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py
@@ -663,13 +663,13 @@ def test_frame_select_complex(setup_path):
 
 def test_frame_select_complex2(setup_path):
 
-    with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths:
+    with ensure_clean_path(["params.hdf", "hist.hdf"]) as paths:
 
         pp, hh = paths
 
         # use non-trivial selection criteria
-        parms = DataFrame({"A": [1, 1, 2, 2, 3]})
-        parms.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"])
+        params = DataFrame({"A": [1, 1, 2, 2, 3]})
+        params.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"])
 
         selection = read_hdf(pp, "df", where="A=[2,3]")
         hist = DataFrame(
diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py
@@ -137,7 +137,7 @@ def test_append_with_timezones_as_index(setup_path, gettz):
     # GH#4098 example
 
     dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))
-    dti = dti._with_freq(None)  # freq doesnt round-trip
+    dti = dti._with_freq(None)  # freq doesn't round-trip
 
     df = DataFrame({"A": Series(range(3), index=dti)})
 
@@ -217,7 +217,7 @@ def test_timezones_fixed_format_frame_non_empty(setup_path):
 
         # index
         rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
-        rng = rng._with_freq(None)  # freq doesnt round-trip
+        rng = rng._with_freq(None)  # freq doesn't round-trip
         df = DataFrame(np.random.randn(len(rng), 4), index=rng)
         store["df"] = df
         result = store["df"]
@@ -334,7 +334,7 @@ def test_dst_transitions(setup_path):
             freq="H",
             ambiguous="infer",
         )
-        times = times._with_freq(None)  # freq doesnt round-trip
+        times = times._with_freq(None)  # freq doesn't round-trip
 
         for i in [times, times + pd.Timedelta("10min")]:
             _maybe_remove(store, "df")
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
@@ -411,12 +411,12 @@ def test_attrs_cols_prefix(datapath, parser):
 
 def test_attrs_unknown_column(parser):
     with pytest.raises(KeyError, match=("no valid column")):
-        geom_df.to_xml(attr_cols=["shape", "degreees", "sides"], parser=parser)
+        geom_df.to_xml(attr_cols=["shape", "degree", "sides"], parser=parser)
 
 
 def test_attrs_wrong_type(parser):
     with pytest.raises(TypeError, match=("is not a valid type for attr_cols")):
-        geom_df.to_xml(attr_cols='"shape", "degreees", "sides"', parser=parser)
+        geom_df.to_xml(attr_cols='"shape", "degree", "sides"', parser=parser)
 
 
 # ELEM_COLS
@@ -453,12 +453,12 @@ def test_elems_cols_nan_output(datapath, parser):
 
 def test_elems_unknown_column(parser):
     with pytest.raises(KeyError, match=("no valid column")):
-        geom_df.to_xml(elem_cols=["shape", "degreees", "sides"], parser=parser)
+        geom_df.to_xml(elem_cols=["shape", "degree", "sides"], parser=parser)
 
 
 def test_elems_wrong_type(parser):
     with pytest.raises(TypeError, match=("is not a valid type for elem_cols")):
-        geom_df.to_xml(elem_cols='"shape", "degreees", "sides"', parser=parser)
+        geom_df.to_xml(elem_cols='"shape", "degree", "sides"', parser=parser)
 
 
 def test_elems_and_attrs_cols(datapath, parser):
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -170,7 +170,7 @@ def test_no_reallocation(self, table_type, dtype):
             n_buckets_start = preallocated_table.get_state()["n_buckets"]
             preallocated_table.map_locations(keys)
             n_buckets_end = preallocated_table.get_state()["n_buckets"]
-            # orgininal number of buckets was enough:
+            # original number of buckets was enough:
             assert n_buckets_start == n_buckets_end
             # check with clean table (not too much preallocated)
             clean_table = table_type()
@@ -219,7 +219,7 @@ def test_no_reallocation_StringHashTable():
         n_buckets_start = preallocated_table.get_state()["n_buckets"]
         preallocated_table.map_locations(keys)
         n_buckets_end = preallocated_table.get_state()["n_buckets"]
-        # orgininal number of buckets was enough:
+        # original number of buckets was enough:
         assert n_buckets_start == n_buckets_end
         # check with clean table (not too much preallocated)
         clean_table = ht.StringHashTable()
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -2208,7 +2208,7 @@ def test_xlabel_ylabel_dataframe_single_plot(
         assert ax.get_xlabel() == old_label
         assert ax.get_ylabel() == ""
 
-        # old xlabel will be overriden and assigned ylabel will be used as ylabel
+        # old xlabel will be overridden and assigned ylabel will be used as ylabel
         ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label)
         assert ax.get_ylabel() == str(new_label)
         assert ax.get_xlabel() == str(new_label)
diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py
@@ -522,7 +522,7 @@ def test_xlabel_ylabel_dataframe_subplots(
         assert all(ax.get_ylabel() == "" for ax in axes)
         assert all(ax.get_xlabel() == old_label for ax in axes)
 
-        # old xlabel will be overriden and assigned ylabel will be used as ylabel
+        # old xlabel will be overridden and assigned ylabel will be used as ylabel
         axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True)
         assert all(ax.get_ylabel() == str(new_label) for ax in axes)
         assert all(ax.get_xlabel() == str(new_label) for ax in axes)
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
@@ -533,7 +533,7 @@ def test_hist_secondary_legend(self):
         _, ax = self.plt.subplots()
         ax = df["a"].plot.hist(legend=True, ax=ax)
         df["b"].plot.hist(ax=ax, legend=True, secondary_y=True)
-        # both legends are dran on left ax
+        # both legends are drawn on left ax
         # left and right axis must be visible
         self._check_legend_labels(ax, labels=["a", "b (right)"])
         assert ax.get_yaxis().get_visible()
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
@@ -381,7 +381,7 @@ def test_df_series_secondary_legend(self):
         _, ax = self.plt.subplots()
         ax = df.plot(ax=ax)
         s.plot(legend=True, secondary_y=True, ax=ax)
-        # both legends are dran on left ax
+        # both legends are drawn on left ax
         # left and right axis must be visible
         self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"])
         assert ax.get_yaxis().get_visible()
@@ -392,7 +392,7 @@ def test_df_series_secondary_legend(self):
         _, ax = self.plt.subplots()
         ax = df.plot(ax=ax)
         s.plot(ax=ax, legend=True, secondary_y=True)
-        # both legends are dran on left ax
+        # both legends are drawn on left ax
         # left and right axis must be visible
         self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"])
         assert ax.get_yaxis().get_visible()
@@ -403,7 +403,7 @@ def test_df_series_secondary_legend(self):
         _, ax = self.plt.subplots()
         ax = df.plot(secondary_y=True, ax=ax)
         s.plot(legend=True, secondary_y=True, ax=ax)
-        # both legends are dran on left ax
+        # both legends are drawn on left ax
         # left axis must be invisible and right axis must be visible
         expected = ["a (right)", "b (right)", "c (right)", "x (right)"]
         self._check_legend_labels(ax.left_ax, labels=expected)
@@ -415,7 +415,7 @@ def test_df_series_secondary_legend(self):
         _, ax = self.plt.subplots()
         ax = df.plot(secondary_y=True, ax=ax)
         s.plot(ax=ax, legend=True, secondary_y=True)
-        # both legends are dran on left ax
+        # both legends are drawn on left ax
         # left axis must be invisible and right axis must be visible
         expected = ["a (right)", "b (right)", "c (right)", "x (right)"]
         self._check_legend_labels(ax.left_ax, expected)
@@ -427,7 +427,7 @@ def test_df_series_secondary_legend(self):
         _, ax = self.plt.subplots()
         ax = df.plot(secondary_y=True, mark_right=False, ax=ax)
         s.plot(ax=ax, legend=True, secondary_y=True)
-        # both legends are dran on left ax
+        # both legends are drawn on left ax
         # left axis must be invisible and right axis must be visible
         expected = ["a", "b", "c", "x (right)"]
         self._check_legend_labels(ax.left_ax, expected)
@@ -798,7 +798,7 @@ def test_xlabel_ylabel_series(self, kind, index_name, old_label, new_label):
         assert ax.get_ylabel() == ""
         assert ax.get_xlabel() == old_label
 
-        # old xlabel will be overriden and assigned ylabel will be used as ylabel
+        # old xlabel will be overridden and assigned ylabel will be used as ylabel
         ax = ser.plot(kind=kind, ylabel=new_label, xlabel=new_label)
         assert ax.get_ylabel() == new_label
         assert ax.get_xlabel() == new_label
diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
@@ -457,7 +457,7 @@ def test_concat_tz_not_aligned(self):
     )
     def test_concat_tz_NaT(self, t1):
         # GH#22796
-        # Concating tz-aware multicolumn DataFrames
+        # Concatenating tz-aware multicolumn DataFrames
         ts1 = Timestamp(t1, tz="UTC")
         ts2 = Timestamp("2015-01-01", tz="UTC")
         ts3 = Timestamp("2015-01-01", tz="UTC")
diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py
@@ -183,19 +183,19 @@ def test_list_type_by(self, left, right, on, left_by, right_by, expected):
 
     def test_left_by_length_equals_to_right_shape0(self):
         # GH 38166
-        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHT"))
-        right = DataFrame([[2, 1]], columns=list("TE"))
-        result = merge_ordered(left, right, on="T", left_by=["G", "H"])
+        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
+        right = DataFrame([[2, 1]], columns=list("ET"))
+        result = merge_ordered(left, right, on="E", left_by=["G", "H"])
         expected = DataFrame(
-            {"G": ["g"] * 3, "H": ["h"] * 3, "T": [1, 2, 3], "E": [np.nan, 1.0, np.nan]}
+            {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]}
         )
 
         tm.assert_frame_equal(result, expected)
 
     def test_elements_not_in_by_but_in_df(self):
         # GH 38167
-        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHT"))
-        right = DataFrame([[2, 1]], columns=list("TE"))
+        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
+        right = DataFrame([[2, 1]], columns=list("ET"))
         msg = r"\{'h'\} not found in left columns"
         with pytest.raises(KeyError, match=msg):
-            merge_ordered(left, right, on="T", left_by=["G", "h"])
+            merge_ordered(left, right, on="E", left_by=["G", "h"])
diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py
@@ -527,15 +527,15 @@ def test_to_numpy_alias():
         pytest.param(
             Timedelta(0).to_timedelta64(),
             marks=pytest.mark.xfail(
-                reason="td64 doesnt return NotImplemented, see numpy#17017"
+                reason="td64 doesn't return NotImplemented, see numpy#17017"
             ),
         ),
         Timestamp(0),
         Timestamp(0).to_pydatetime(),
         pytest.param(
             Timestamp(0).to_datetime64(),
             marks=pytest.mark.xfail(
-                reason="dt64 doesnt return NotImplemented, see numpy#17017"
+                reason="dt64 doesn't return NotImplemented, see numpy#17017"
             ),
         ),
         Timestamp(0).tz_localize("UTC"),
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -12,7 +12,7 @@
 # test Series, the default dtype for the expected result (which is valid
 # for most cases), and the specific cases where the result deviates from
 # this default. Those overrides are defined as a dict with (keyword, val) as
-# dictionary key. In case of multiple items, the last override takes precendence.
+# dictionary key. In case of multiple items, the last override takes precedence.
 test_cases = [
     (
         # data
diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py
@@ -98,7 +98,7 @@ class TestSeriesNLargestNSmallest:
     )
     def test_nlargest_error(self, r):
         dt = r.dtype
-        msg = f"Cannot use method 'n(larg|small)est' with dtype {dt}"
+        msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}"
         args = 2, len(r), 0, -1
         methods = r.nlargest, r.nsmallest
         for method, arg in product(methods, args):
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
@@ -25,7 +25,7 @@ def read_csv(self, path, **kwargs):
         return out
 
     def test_from_csv(self, datetime_series, string_series):
-        # freq doesnt round-trip
+        # freq doesn't round-trip
         datetime_series.index = datetime_series.index._with_freq(None)
 
         with tm.ensure_clean() as path:
diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py
@@ -28,7 +28,7 @@ def test_str_cat_name(index_or_series, other):
 def test_str_cat(index_or_series):
     box = index_or_series
     # test_cat above tests "str_cat" from ndarray;
-    # here testing "str.cat" from Series/Indext to ndarray/list
+    # here testing "str.cat" from Series/Index to ndarray/list
     s = box(["a", "a", "b", "b", "c", np.nan])
 
     # single array
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -515,7 +515,7 @@ def test_to_datetime_YYYYMMDD(self):
         assert actual == datetime(2008, 1, 15)
 
     def test_to_datetime_unparseable_ignore(self):
-        # unparseable
+        # unparsable
         s = "Month 1, 1999"
         assert to_datetime(s, errors="ignore") == s
 
@@ -2469,7 +2469,7 @@ def test_empty_string_datetime_coerce__format():
     with pytest.raises(ValueError, match="does not match format"):
         result = to_datetime(td, format=format, errors="raise")
 
-    # don't raise an expection in case no format is given
+    # don't raise an exception in case no format is given
     result = to_datetime(td, errors="raise")
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
@@ -64,7 +64,7 @@
 
 
 class TestCommon(Base):
-    # exected value created by Base._get_offset
+    # executed value created by Base._get_offset
     # are applied to 2011/01/01 09:00 (Saturday)
     # used for .apply and .rollforward
     expecteds = {
diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py
@@ -7,7 +7,7 @@
 
 def test_fields_readonly():
     # https://github.com/vaexio/vaex/issues/357
-    #  fields functions should't raise when we pass read-only data
+    #  fields functions shouldn't raise when we pass read-only data
     dtindex = np.arange(5, dtype=np.int64) * 10 ** 9 * 3600 * 24 * 32
     dtindex.flags.writeable = False
 

Original file line number	Diff line number	Diff line change
`@@ -457,7 +457,7 @@ def test_concat_tz_not_aligned(self):`
`457`	`457`	`)`
`458`	`458`	`def test_concat_tz_NaT(self, t1):`
`459`	`459`	`# GH#22796`
`460`		`- # Concating tz-aware multicolumn DataFrames`
	`460`	`+ # Concatenating tz-aware multicolumn DataFrames`
`461`	`461`	`ts1 = Timestamp(t1, tz="UTC")`
`462`	`462`	`ts2 = Timestamp("2015-01-01", tz="UTC")`
`463`	`463`	`ts3 = Timestamp("2015-01-01", tz="UTC")`
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`# test Series, the default dtype for the expected result (which is valid`
`13`	`13`	`# for most cases), and the specific cases where the result deviates from`
`14`	`14`	`# this default. Those overrides are defined as a dict with (keyword, val) as`
`15`		`-# dictionary key. In case of multiple items, the last override takes precendence.`
	`15`	`+# dictionary key. In case of multiple items, the last override takes precedence.`
`16`	`16`	`test_cases = [`
`17`	`17`	`(`
`18`	`18`	`# data`
Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,7 @@ class TestSeriesNLargestNSmallest:`
`98`	`98`	`)`
`99`	`99`	`def test_nlargest_error(self, r):`
`100`	`100`	`dt = r.dtype`
`101`		`- msg = f"Cannot use method 'n(larg\|small)est' with dtype {dt}"`
	`101`	`+ msg = f"Cannot use method 'n(largest\|smallest)' with dtype {dt}"`
`102`	`102`	`args = 2, len(r), 0, -1`
`103`	`103`	`methods = r.nlargest, r.nsmallest`
`104`	`104`	`for method, arg in product(methods, args):`