Skip to content

Commit d201fcc

Browse files
authored
CI,STYLE: add spell check? (#38776)
* run codespell on pandas/core * fix underline
1 parent e752928 commit d201fcc

20 files changed

+46
-37
lines changed

.pre-commit-config.yaml

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
minimum_pre_commit_version: '2.9.2'
1+
minimum_pre_commit_version: 2.9.2
22
repos:
33
- repo: https://github.com/python/black
44
rev: 20.8b1
@@ -168,3 +168,9 @@ repos:
168168
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
169169
- id: trailing-whitespace
170170
exclude: \.(html|svg)$
171+
- repo: https://github.com/codespell-project/codespell
172+
rev: v2.0.0
173+
hooks:
174+
- id: codespell
175+
types_or: [python, rst, markdown]
176+
files: ^pandas/core/

pandas/core/arrays/_mixins.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def fillna(
275275
if method is not None:
276276
func = missing.get_fill_func(method)
277277
new_values = func(self._ndarray.copy(), limit=limit, mask=mask)
278-
# TODO: PandasArray didnt used to copy, need tests for this
278+
# TODO: PandasArray didn't used to copy, need tests for this
279279
new_values = self._from_backing_data(new_values)
280280
else:
281281
# fill with value

pandas/core/arrays/datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ def isin(self, values) -> np.ndarray:
741741
return np.zeros(self.shape, dtype=bool)
742742

743743
if not isinstance(values, type(self)):
744-
inferrable = [
744+
inferable = [
745745
"timedelta",
746746
"timedelta64",
747747
"datetime",
@@ -751,7 +751,7 @@ def isin(self, values) -> np.ndarray:
751751
]
752752
if values.dtype == object:
753753
inferred = lib.infer_dtype(values, skipna=False)
754-
if inferred not in inferrable:
754+
if inferred not in inferable:
755755
if inferred == "string":
756756
pass
757757

pandas/core/arrays/floating.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class FloatingArray(NumericArray):
175175
.. warning::
176176
177177
FloatingArray is currently experimental, and its API or internal
178-
implementation may change without warning. Expecially the behaviour
178+
implementation may change without warning. Especially the behaviour
179179
regarding NaN (distinct from NA missing values) is subject to change.
180180
181181
We represent a FloatingArray with 2 numpy arrays:

pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,7 @@ def _concat_same_type(
975975

976976
else:
977977
# when concatenating block indices, we don't claim that you'll
978-
# get an identical index as concating the values and then
978+
# get an identical index as concatenating the values and then
979979
# creating a new index. We don't want to spend the time trying
980980
# to merge blocks across arrays in `to_concat`, so the resulting
981981
# BlockIndex may have more blocks.

pandas/core/arrays/sparse/dtype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
371371
fill_value = fill_values[0]
372372

373373
# np.nan isn't a singleton, so we may end up with multiple
374-
# NaNs here, so we ignore tha all NA case too.
374+
# NaNs here, so we ignore the all NA case too.
375375
if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
376376
warnings.warn(
377377
"Concatenating sparse arrays with multiple fill "

pandas/core/arrays/string_arrow.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
467467
elif not isinstance(value, str):
468468
raise ValueError("Scalar must be NA or str")
469469

470-
# Slice data and insert inbetween
470+
# Slice data and insert in-between
471471
new_data = [
472472
*self._data[0:key].chunks,
473473
pa.array([value], type=pa.string()),
@@ -616,7 +616,7 @@ def value_counts(self, dropna: bool = True) -> Series:
616616

617617
# Index cannot hold ExtensionArrays yet
618618
index = Index(type(self)(vc.field(0)).astype(object))
619-
# No missings, so we can adhere to the interface and return a numpy array.
619+
# No missing values so we can adhere to the interface and return a numpy array.
620620
counts = np.array(vc.field(1))
621621

622622
if dropna and self._data.null_count > 0:

pandas/core/computation/parsing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def create_valid_python_identifier(name: str) -> str:
3535

3636
# Create a dict with the special characters and their replacement string.
3737
# EXACT_TOKEN_TYPES contains these special characters
38-
# toke.tok_name contains a readable description of the replacement string.
38+
# token.tok_name contains a readable description of the replacement string.
3939
special_characters_replacements = {
4040
char: f"_{token.tok_name[tokval]}_"
4141
# The ignore here is because of a bug in mypy that is resolved in 0.740

pandas/core/generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5560,7 +5560,7 @@ def _is_mixed_type(self) -> bool_t:
55605560
return False
55615561

55625562
if self._mgr.any_extension_types:
5563-
# Even if they have the same dtype, we cant consolidate them,
5563+
# Even if they have the same dtype, we can't consolidate them,
55645564
# so we pretend this is "mixed'"
55655565
return True
55665566

@@ -10647,7 +10647,7 @@ def _add_numeric_operations(cls):
1064710647
"""
1064810648
Add the operations to the cls; evaluate the doc strings again
1064910649
"""
10650-
axis_descr, name1, name2 = _doc_parms(cls)
10650+
axis_descr, name1, name2 = _doc_params(cls)
1065110651

1065210652
@doc(
1065310653
_bool_doc,
@@ -11207,8 +11207,8 @@ def last_valid_index(self):
1120711207
return self._find_valid_index("last")
1120811208

1120911209

11210-
def _doc_parms(cls):
11211-
"""Return a tuple of the doc parms."""
11210+
def _doc_params(cls):
11211+
"""Return a tuple of the doc params."""
1121211212
axis_descr = (
1121311213
f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}"
1121411214
)

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ def _ea_wrap_cython_operation(
495495
If we have an ExtensionArray, unwrap, call _cython_operation, and
496496
re-wrap if appropriate.
497497
"""
498-
# TODO: general case implementation overrideable by EAs.
498+
# TODO: general case implementation overridable by EAs.
499499
orig_values = values
500500

501501
if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4842,7 +4842,7 @@ def argsort(self, *args, **kwargs) -> np.ndarray:
48424842
>>> idx[order]
48434843
Index(['a', 'b', 'c', 'd'], dtype='object')
48444844
"""
4845-
# This works for either ndarray or EA, is overriden
4845+
# This works for either ndarray or EA, is overridden
48464846
# by RangeIndex, MultIIndex
48474847
return self._data.argsort(*args, **kwargs)
48484848

@@ -4974,7 +4974,7 @@ def get_indexer_non_unique(self, target):
49744974
return self._get_indexer_non_comparable(target, method=None, unique=False)
49754975

49764976
if not is_dtype_equal(self.dtype, target.dtype):
4977-
# TODO: if object, could use infer_dtype to pre-empt costly
4977+
# TODO: if object, could use infer_dtype to preempt costly
49784978
# conversion if still non-comparable?
49794979
dtype = find_common_type([self.dtype, target.dtype])
49804980
if (

pandas/core/indexes/datetimelike.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,12 @@ def equals(self, other: object) -> bool:
164164
return False
165165
elif not isinstance(other, type(self)):
166166
should_try = False
167-
inferrable = self._data._infer_matches
167+
inferable = self._data._infer_matches
168168
if other.dtype == object:
169-
should_try = other.inferred_type in inferrable
169+
should_try = other.inferred_type in inferable
170170
elif is_categorical_dtype(other.dtype):
171171
other = cast("CategoricalIndex", other)
172-
should_try = other.categories.inferred_type in inferrable
172+
should_try = other.categories.inferred_type in inferable
173173

174174
if should_try:
175175
try:

pandas/core/indexes/range.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,7 @@ def difference(self, other, sort=None):
643643
if len(overlap) == len(self):
644644
return self[:0].rename(res_name)
645645
if not isinstance(overlap, RangeIndex):
646-
# We wont end up with RangeIndex, so fall back
646+
# We won't end up with RangeIndex, so fall back
647647
return super().difference(other, sort=sort)
648648
if overlap.step != first.step:
649649
# In some cases we might be able to get a RangeIndex back,

pandas/core/internals/blocks.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1807,7 +1807,7 @@ def _slice(self, slicer):
18071807
# return same dims as we currently have
18081808
if not isinstance(slicer, tuple) and self.ndim == 2:
18091809
# reached via getitem_block via _slice_take_blocks_ax0
1810-
# TODO(EA2D): wont be necessary with 2D EAs
1810+
# TODO(EA2D): won't be necessary with 2D EAs
18111811
slicer = (slicer, slice(None))
18121812

18131813
if isinstance(slicer, tuple) and len(slicer) == 2:
@@ -1817,7 +1817,7 @@ def _slice(self, slicer):
18171817
"invalid slicing for a 1-ndim ExtensionArray", first
18181818
)
18191819
# GH#32959 only full-slicers along fake-dim0 are valid
1820-
# TODO(EA2D): wont be necessary with 2D EAs
1820+
# TODO(EA2D): won't be necessary with 2D EAs
18211821
new_locs = self.mgr_locs[first]
18221822
if len(new_locs):
18231823
# effectively slice(None)
@@ -2280,7 +2280,7 @@ def _check_ndim(self, values, ndim):
22802280
"""
22812281
ndim inference and validation.
22822282
2283-
This is overriden by the DatetimeTZBlock to check the case of 2D
2283+
This is overridden by the DatetimeTZBlock to check the case of 2D
22842284
data (values.ndim == 2), which should only be allowed if ndim is
22852285
also 2.
22862286
The case of 1D array is still allowed with both ndim of 1 or 2, as

pandas/core/internals/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def _convert_object_array(
746746
content: List[Scalar], dtype: Optional[DtypeObj] = None
747747
) -> List[Scalar]:
748748
"""
749-
Internal function ot convert object array.
749+
Internal function to convert object array.
750750
751751
Parameters
752752
----------

pandas/core/internals/managers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1355,7 +1355,7 @@ def _slice_take_blocks_ax0(
13551355
blk = self.blocks[0]
13561356

13571357
if sl_type in ("slice", "mask"):
1358-
# GH#32959 EABlock would fail since we cant make 0-width
1358+
# GH#32959 EABlock would fail since we can't make 0-width
13591359
# TODO(EA2D): special casing unnecessary with 2D EAs
13601360
if sllen == 0:
13611361
return []

pandas/core/nanops.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1221,33 +1221,33 @@ def nankurt(
12211221

12221222
with np.errstate(invalid="ignore", divide="ignore"):
12231223
adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
1224-
numer = count * (count + 1) * (count - 1) * m4
1225-
denom = (count - 2) * (count - 3) * m2 ** 2
1224+
numerator = count * (count + 1) * (count - 1) * m4
1225+
denominator = (count - 2) * (count - 3) * m2 ** 2
12261226

12271227
# floating point error
12281228
#
12291229
# #18044 in _libs/windows.pyx calc_kurt follow this behavior
12301230
# to fix the fperr to treat denom <1e-14 as zero
1231-
numer = _zero_out_fperr(numer)
1232-
denom = _zero_out_fperr(denom)
1231+
numerator = _zero_out_fperr(numerator)
1232+
denominator = _zero_out_fperr(denominator)
12331233

1234-
if not isinstance(denom, np.ndarray):
1234+
if not isinstance(denominator, np.ndarray):
12351235
# if ``denom`` is a scalar, check these corner cases first before
12361236
# doing division
12371237
if count < 4:
12381238
return np.nan
1239-
if denom == 0:
1239+
if denominator == 0:
12401240
return 0
12411241

12421242
with np.errstate(invalid="ignore", divide="ignore"):
1243-
result = numer / denom - adj
1243+
result = numerator / denominator - adj
12441244

12451245
dtype = values.dtype
12461246
if is_float_dtype(dtype):
12471247
result = result.astype(dtype)
12481248

12491249
if isinstance(result, np.ndarray):
1250-
result = np.where(denom == 0, 0, result)
1250+
result = np.where(denominator == 0, 0, result)
12511251
result[count < 4] = np.nan
12521252

12531253
return result

pandas/core/reshape/pivot.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -740,8 +740,8 @@ def _build_names_mapper(
740740
A row or column name is replaced if it is duplicate among the rows of the inputs,
741741
among the columns of the inputs or between the rows and the columns.
742742
743-
Paramters
744-
---------
743+
Parameters
744+
----------
745745
rownames: list[str]
746746
colnames: list[str]
747747

pandas/core/window/rolling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1858,7 +1858,7 @@ def _get_corr(a, b):
18581858
window=window, min_periods=self.min_periods, center=self.center
18591859
)
18601860
# GH 31286: Through using var instead of std we can avoid numerical
1861-
# issues when the result of var is withing floating proint precision
1861+
# issues when the result of var is within floating proint precision
18621862
# while std is not.
18631863
return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5
18641864

setup.cfg

+3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ filterwarnings =
6363
error:The SparseArray:FutureWarning
6464
junit_family=xunit2
6565

66+
[codespell]
67+
ignore-words-list=ba,blocs,coo,datas,fo,hist,nd,ser
68+
6669
[coverage:run]
6770
branch = False
6871
omit =

0 commit comments

Comments
 (0)