STYLE: Extending codespell to pandas/tests (#40320)

01-vyom · web-flow · commit 3cf05edb3972 · 2021-03-11T10:45:01.000Z
* STYLE: Extending codespell to pandas/tests part 1

* FIX: Variable name made consistent.
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
@@ -1158,7 +1158,7 @@ def test_td64arr_add_sub_int(self, box_with_array, one):
         msg = "Addition/subtraction of integers"
         assert_invalid_addsub_type(tdarr, one, msg)
 
-        # TOOD: get inplace ops into assert_invalid_addsub_type
+        # TODO: get inplace ops into assert_invalid_addsub_type
         with pytest.raises(TypeError, match=msg):
             tdarr += one
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py
@@ -107,13 +107,13 @@ def test_fillna_array(self):
         other = cat.fillna("C")
         result = cat.fillna(other)
         tm.assert_categorical_equal(result, other)
-        assert isna(cat[-1])  # didnt modify original inplace
+        assert isna(cat[-1])  # didn't modify original inplace
 
         other = np.array(["A", "B", "C", "B", "A"])
         result = cat.fillna(other)
         expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
         tm.assert_categorical_equal(result, expected)
-        assert isna(cat[-1])  # didnt modify original inplace
+        assert isna(cat[-1])  # didn't modify original inplace
 
     @pytest.mark.parametrize(
         "values, expected",
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -95,7 +95,7 @@ def test_constructor_na_dtype(self, dtype):
             SparseArray([0, 1, np.nan], dtype=dtype)
 
     def test_constructor_warns_when_losing_timezone(self):
-        # GH#32501 warn when losing timezone inforamtion
+        # GH#32501 warn when losing timezone information
         dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
 
         expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -1144,11 +1144,11 @@ def test_performance_warning_for_poor_alignment(self, engine, parser):
             if not is_python_engine:
                 assert len(w) == 1
                 msg = str(w[0].message)
-                loged = np.log10(s.size - df.shape[1])
+                logged = np.log10(s.size - df.shape[1])
                 expected = (
                     f"Alignment difference on axis 1 is larger "
                     f"than an order of magnitude on term 'df', "
-                    f"by more than {loged:.4g}; performance may suffer"
+                    f"by more than {logged:.4g}; performance may suffer"
                 )
                 assert msg == expected
 
@@ -1404,25 +1404,25 @@ def test_multi_line_expression(self):
 
         expected["c"] = expected["a"] + expected["b"]
         expected["d"] = expected["c"] + expected["b"]
-        ans = df.eval(
+        answer = df.eval(
             """
         c = a + b
         d = c + b""",
             inplace=True,
         )
         tm.assert_frame_equal(expected, df)
-        assert ans is None
+        assert answer is None
 
         expected["a"] = expected["a"] - 1
         expected["e"] = expected["a"] + 2
-        ans = df.eval(
+        answer = df.eval(
             """
         a = a - 1
         e = a + 2""",
             inplace=True,
         )
         tm.assert_frame_equal(expected, df)
-        assert ans is None
+        assert answer is None
 
         # multi-line not valid if not all assignments
         msg = "Multi-line expressions are only valid if all expressions contain"
@@ -1467,15 +1467,15 @@ def test_multi_line_expression_local_variable(self):
         local_var = 7
         expected["c"] = expected["a"] * local_var
         expected["d"] = expected["c"] + local_var
-        ans = df.eval(
+        answer = df.eval(
             """
         c = a * @local_var
         d = c + @local_var
         """,
             inplace=True,
         )
         tm.assert_frame_equal(expected, df)
-        assert ans is None
+        assert answer is None
 
     def test_multi_line_expression_callable_local_variable(self):
         # 26426
@@ -1487,15 +1487,15 @@ def local_func(a, b):
         expected = df.copy()
         expected["c"] = expected["a"] * local_func(1, 7)
         expected["d"] = expected["c"] + local_func(1, 7)
-        ans = df.eval(
+        answer = df.eval(
             """
         c = a * @local_func(1, 7)
         d = c + @local_func(1, 7)
         """,
             inplace=True,
         )
         tm.assert_frame_equal(expected, df)
-        assert ans is None
+        assert answer is None
 
     def test_multi_line_expression_callable_local_variable_with_kwargs(self):
         # 26426
@@ -1507,15 +1507,15 @@ def local_func(a, b):
         expected = df.copy()
         expected["c"] = expected["a"] * local_func(b=7, a=1)
         expected["d"] = expected["c"] + local_func(b=7, a=1)
-        ans = df.eval(
+        answer = df.eval(
             """
         c = a * @local_func(b=7, a=1)
         d = c + @local_func(b=7, a=1)
         """,
             inplace=True,
         )
         tm.assert_frame_equal(expected, df)
-        assert ans is None
+        assert answer is None
 
     def test_assignment_in_query(self):
         # GH 8664
diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py
@@ -112,8 +112,8 @@ def test_describe_option(self):
 
         # if no doc is specified we get a default message
         # saying "description not available"
-        assert "vailable" in self.cf.describe_option("f", _print_desc=False)
-        assert "vailable" in self.cf.describe_option("g.h", _print_desc=False)
+        assert "available" in self.cf.describe_option("f", _print_desc=False)
+        assert "available" in self.cf.describe_option("g.h", _print_desc=False)
         assert "precated" in self.cf.describe_option("g.h", _print_desc=False)
         assert "k" in self.cf.describe_option("g.h", _print_desc=False)
 
diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py
@@ -103,7 +103,7 @@ def _assert_match(result_fill_value, expected_fill_value):
 
     if hasattr(result_fill_value, "dtype"):
         # Compare types in a way that is robust to platform-specific
-        #  idiosyncracies where e.g. sometimes we get "ulonglong" as an alias
+        #  idiosyncrasies where e.g. sometimes we get "ulonglong" as an alias
         #  for "uint64" or "intc" as an alias for "int32"
         assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind
         assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
@@ -553,7 +553,7 @@ def test_array_equivalent_nested():
 )
 def test_na_value_for_dtype(dtype, na_value):
     result = na_value_for_dtype(dtype)
-    # identify check doesnt work for datetime64/timedelta64("NaT") bc they
+    # identify check doesn't work for datetime64/timedelta64("NaT") bc they
     #  are not singletons
     assert result is na_value or (
         isna(result) and isna(na_value) and type(result) is type(na_value)
diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
@@ -238,7 +238,7 @@ def test_reductions_2d_axis1(self, data, method, request):
             else:
                 raise AssertionError("Both reductions should raise or neither")
 
-        # not necesarrily type/dtype-preserving, so weaker assertions
+        # not necessarily type/dtype-preserving, so weaker assertions
         assert result.shape == (1,)
         expected_scalar = getattr(data, method)()
         if pd.isna(result[0]):
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -90,7 +90,7 @@ def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_valu
         assert data_for_sorting.argmax() == 1
         assert data_for_sorting.argmin() == 2
 
-        # with repeated values -> first occurence
+        # with repeated values -> first occurrence
         data = data_for_sorting.take([2, 0, 0, 1, 1, 2])
         assert data.argmax() == 3
         assert data.argmin() == 0
@@ -109,7 +109,7 @@ def test_argmin_argmax_empty_array(self, method, data):
 
     @pytest.mark.parametrize("method", ["argmax", "argmin"])
     def test_argmin_argmax_all_na(self, method, data, na_value):
-        # all missing with skipna=True is the same as emtpy
+        # all missing with skipna=True is the same as empty
         err_msg = "attempt to get"
         data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype)
         with pytest.raises(ValueError, match=err_msg):
@@ -530,7 +530,7 @@ def test_equals(self, data, na_value, as_series, box):
         # different length
         assert data[:2].equals(data[:3]) is False
 
-        # emtpy are equal
+        # empty are equal
         assert data[:0].equals(data[:0]) is True
 
         # other types
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -234,7 +234,7 @@ def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):
         assert data_for_sorting.argmax() == 0
         assert data_for_sorting.argmin() == 2
 
-        # with repeated values -> first occurence
+        # with repeated values -> first occurrence
         data = data_for_sorting.take([2, 0, 0, 1, 1, 2])
         assert data.argmax() == 1
         assert data.argmin() == 0
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -388,7 +388,7 @@ def test_update_inplace_sets_valid_block_values():
     # inplace update of a single column
     df["a"].fillna(1, inplace=True)
 
-    # check we havent put a Series into any block.values
+    # check we haven't put a Series into any block.values
     assert isinstance(df._mgr.blocks[0].values, Categorical)
 
     # smoketest for OP bug from GH#35731
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1195,7 +1195,7 @@ def test_constructor_unequal_length_nested_list_column(self):
 
     def test_constructor_sequence_like(self):
         # GH 3783
-        # collections.Squence like
+        # collections.Sequence like
 
         class DummyContainer(abc.Sequence):
             def __init__(self, lst):
@@ -1426,9 +1426,9 @@ def test_constructor_list_of_dataclasses(self):
 
         Point = make_dataclass("Point", [("x", int), ("y", int)])
 
-        datas = [Point(0, 3), Point(1, 3)]
+        data = [Point(0, 3), Point(1, 3)]
         expected = DataFrame({"x": [0, 1], "y": [3, 3]})
-        result = DataFrame(datas)
+        result = DataFrame(data)
         tm.assert_frame_equal(result, expected)
 
     def test_constructor_list_of_dataclasses_with_varying_types(self):
@@ -1439,12 +1439,12 @@ def test_constructor_list_of_dataclasses_with_varying_types(self):
         Point = make_dataclass("Point", [("x", int), ("y", int)])
         HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)])
 
-        datas = [Point(0, 3), HLine(1, 3, 3)]
+        data = [Point(0, 3), HLine(1, 3, 3)]
 
         expected = DataFrame(
             {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]}
         )
-        result = DataFrame(datas)
+        result = DataFrame(data)
         tm.assert_frame_equal(result, expected)
 
     def test_constructor_list_of_dataclasses_error_thrown(self):
@@ -1912,7 +1912,7 @@ def test_constructor_for_list_with_dtypes(self):
         expected = Series([np.dtype("int64")] * 5)
         tm.assert_series_equal(result, expected)
 
-        # overflow issue? (we always expecte int64 upcasting here)
+        # overflow issue? (we always expected int64 upcasting here)
         df = DataFrame({"a": [2 ** 31, 2 ** 31 + 1]})
         assert df.dtypes.iloc[0] == np.dtype("int64")
 
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1462,9 +1462,9 @@ def test_unstack_odd_failure(self):
 Sat,Dinner,Yes,120.77,42
 Sun,Dinner,No,180.57,57
 Sun,Dinner,Yes,66.82,19
-Thur,Dinner,No,3.0,1
-Thur,Lunch,No,117.32,44
-Thur,Lunch,Yes,51.51,17"""
+Thu,Dinner,No,3.0,1
+Thu,Lunch,No,117.32,44
+Thu,Lunch,Yes,51.51,17"""
 
         df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"])
 
@@ -1490,7 +1490,7 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data):
     def test_unstack_bug(self):
         df = DataFrame(
             {
-                "state": ["naive", "naive", "naive", "activ", "activ", "activ"],
+                "state": ["naive", "naive", "naive", "active", "active", "active"],
                 "exp": ["a", "b", "b", "b", "a", "a"],
                 "barcode": [1, 2, 3, 4, 1, 3],
                 "v": ["hi", "hi", "bye", "bye", "bye", "peace"],
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -79,7 +79,7 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
 
 def _transfer_marks(engine, read_ext):
     """
-    engine gives us a pytest.param objec with some marks, read_ext is just
+    engine gives us a pytest.param object with some marks, read_ext is just
     a string.  We need to generate a new pytest.param inheriting the marks.
     """
     values = engine.values + (read_ext,)
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -436,7 +436,7 @@ def test_mixed(self, frame, path):
     def test_ts_frame(self, tsframe, path):
         df = tsframe
 
-        # freq doesnt round-trip
+        # freq doesn't round-trip
         index = pd.DatetimeIndex(np.asarray(df.index), freq=None)
         df.index = index
 
@@ -515,7 +515,7 @@ def test_inf_roundtrip(self, path):
 
     def test_sheets(self, frame, tsframe, path):
 
-        # freq doesnt round-trip
+        # freq doesn't round-trip
         index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
         tsframe.index = index
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -2277,7 +2277,7 @@ def test_east_asian_unicode_series(self):
             )
             assert repr(s) == expected
 
-        # Emable Unicode option -----------------------------------------
+        # Enable Unicode option -----------------------------------------
         with option_context("display.unicode.east_asian_width", True):
 
             # unicode index
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -63,7 +63,7 @@ def setup(self):
     @pytest.fixture
     def datetime_series(self):
         # Same as usual datetime_series, but with index freq set to None,
-        #  since that doesnt round-trip, see GH#33711
+        #  since that doesn't round-trip, see GH#33711
         ser = tm.makeTimeSeries()
         ser.name = "ts"
         ser.index = ser.index._with_freq(None)
@@ -72,7 +72,7 @@ def datetime_series(self):
     @pytest.fixture
     def datetime_frame(self):
         # Same as usual datetime_frame, but with index freq set to None,
-        #  since that doesnt round-trip, see GH#33711
+        #  since that doesn't round-trip, see GH#33711
         df = DataFrame(tm.getTimeSeriesData())
         df.index = df.index._with_freq(None)
         return df
@@ -459,7 +459,7 @@ def test_frame_mixedtype_orient(self):  # GH10289
 
     def test_v12_compat(self, datapath):
         dti = pd.date_range("2000-01-03", "2000-01-07")
-        # freq doesnt roundtrip
+        # freq doesn't roundtrip
         dti = DatetimeIndex(np.asarray(dti), freq=None)
         df = DataFrame(
             [
@@ -489,7 +489,7 @@ def test_v12_compat(self, datapath):
 
     def test_blocks_compat_GH9037(self):
         index = pd.date_range("20000101", periods=10, freq="H")
-        # freq doesnt round-trip
+        # freq doesn't round-trip
         index = DatetimeIndex(list(index), freq=None)
 
         df_mixed = DataFrame(
@@ -637,8 +637,10 @@ def test_series_non_unique_index(self):
         tm.assert_series_equal(
             s, read_json(s.to_json(orient="split"), orient="split", typ="series")
         )
-        unser = read_json(s.to_json(orient="records"), orient="records", typ="series")
-        tm.assert_numpy_array_equal(s.values, unser.values)
+        unserialized = read_json(
+            s.to_json(orient="records"), orient="records", typ="series"
+        )
+        tm.assert_numpy_array_equal(s.values, unserialized.values)
 
     def test_series_default_orient(self, string_series):
         assert string_series.to_json() == string_series.to_json(orient="index")
diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
@@ -1096,7 +1096,7 @@ def test_index(self):
     def test_datetime_index(self):
         date_unit = "ns"
 
-        # freq doesnt round-trip
+        # freq doesn't round-trip
         rng = DatetimeIndex(list(date_range("1/1/2000", periods=20)), freq=None)
         encoded = ujson.encode(rng, date_unit=date_unit)
 
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
@@ -356,7 +356,7 @@ def test_escapechar(all_parsers):
     # https://stackoverflow.com/questions/13824840/feature-request-for-
     # pandas-read-csv
     data = '''SEARCH_TERM,ACTUAL_URL
-"bra tv bord","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
+"bra tv board","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
 "tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
 "SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals series","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"'''  # noqa
 
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -262,7 +262,7 @@ def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file):
         tm.assert_frame_equal(result, expected)
 
     def test_read_csv_chunked_download(self, s3_resource, caplog, s3so):
-        # 8 MB, S3FS usees 5MB chunks
+        # 8 MB, S3FS uses 5MB chunks
         import s3fs
 
         df = DataFrame(np.random.randn(100000, 4), columns=list("abcd"))
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py

Original file line number	Diff line number	Diff line change
`@@ -553,7 +553,7 @@ def test_array_equivalent_nested():`
`553`	`553`	`)`
`554`	`554`	`def test_na_value_for_dtype(dtype, na_value):`
`555`	`555`	`result = na_value_for_dtype(dtype)`
`556`		`- # identify check doesnt work for datetime64/timedelta64("NaT") bc they`
	`556`	`+ # identify check doesn't work for datetime64/timedelta64("NaT") bc they`
`557`	`557`	`# are not singletons`
`558`	`558`	`assert result is na_value or (`
`559`	`559`	`isna(result) and isna(na_value) and type(result) is type(na_value)`
Original file line number	Diff line number	Diff line change
`@@ -2277,7 +2277,7 @@ def test_east_asian_unicode_series(self):`
`2277`	`2277`	`)`
`2278`	`2278`	`assert repr(s) == expected`
`2279`	`2279`
`2280`		`- # Emable Unicode option -----------------------------------------`
	`2280`	`+ # Enable Unicode option -----------------------------------------`
`2281`	`2281`	`with option_context("display.unicode.east_asian_width", True):`
`2282`	`2282`
`2283`	`2283`	`# unicode index`