Skip to content

Commit 5b850ac

Browse files
committed
Merge branch 'main' into 37715-remove-mypy-ignore
2 parents c861a74 + 2e0c8a4 commit 5b850ac

35 files changed

+81
-40
lines changed

ci/code_checks.sh

+1
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
524524
pandas.api.extensions.ExtensionArray.insert \
525525
pandas.api.extensions.ExtensionArray.isin \
526526
pandas.api.extensions.ExtensionArray.isna \
527+
pandas.api.extensions.ExtensionArray.map \
527528
pandas.api.extensions.ExtensionArray.ravel \
528529
pandas.api.extensions.ExtensionArray.searchsorted \
529530
pandas.api.extensions.ExtensionArray.shift \

doc/source/getting_started/index.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ Data sets do not only contain numerical data. pandas provides a wide range of fu
533533
Coming from...
534534
--------------
535535

536-
Are you familiar with other software for manipulating tablular data? Learn
536+
Are you familiar with other software for manipulating tabular data? Learn
537537
the pandas-equivalent operations compared to software you already know:
538538

539539
.. panels::

doc/source/reference/extensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ objects.
5353
api.extensions.ExtensionArray.insert
5454
api.extensions.ExtensionArray.isin
5555
api.extensions.ExtensionArray.isna
56+
api.extensions.ExtensionArray.map
5657
api.extensions.ExtensionArray.ravel
5758
api.extensions.ExtensionArray.repeat
5859
api.extensions.ExtensionArray.searchsorted

doc/source/user_guide/advanced.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
322322
.. warning::
323323

324324
You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and
325-
for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted
325+
for the **columns**. There are some ambiguous cases where the passed indexer could be misinterpreted
326326
  as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows.
327327

328328
You should do this:

doc/source/user_guide/groupby.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ the columns except the one we specify:
149149
grouped.sum()
150150
151151
The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do
152-
a tranpose:
152+
a transpose:
153153

154154
.. ipython::
155155

doc/source/user_guide/timeseries.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -507,14 +507,18 @@ used if a custom frequency string is passed.
507507
Timestamp limitations
508508
---------------------
509509

510-
Since pandas represents timestamps in nanosecond resolution, the time span that
510+
The limits of timestamp representation depend on the chosen resolution. For
511+
nanosecond resolution, the time span that
511512
can be represented using a 64-bit integer is limited to approximately 584 years:
512513

513514
.. ipython:: python
514515
515516
pd.Timestamp.min
516517
pd.Timestamp.max
517518
519+
When choosing second-resolution, the available range grows to ``+/- 2.9e11 years``.
520+
Different resolutions can be converted to each other through ``as_unit``.
521+
518522
.. seealso::
519523

520524
:ref:`timeseries.oob`

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ I/O
209209
^^^
210210
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
211211
- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
212+
- Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
212213
-
213214

214215
Period

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1491,7 +1491,7 @@ def validate_func_kwargs(
14911491
Returns
14921492
-------
14931493
columns : List[str]
1494-
List of user-provied keys.
1494+
List of user-provided keys.
14951495
func : List[Union[str, callable[...,Any]]]
14961496
List of user-provided aggfuncs
14971497

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
251251
except pa.ArrowInvalid:
252252
# GH50430: let pyarrow infer type, then cast
253253
scalars = pa.array(scalars, from_pandas=True)
254-
if pa_dtype:
254+
if pa_dtype and scalars.type != pa_dtype:
255255
scalars = scalars.cast(pa_dtype)
256256
return cls(scalars)
257257

pandas/core/arrays/base.py

+6
Original file line numberDiff line numberDiff line change
@@ -1719,6 +1719,12 @@ def map(self, mapper, na_action=None):
17191719
The output of the mapping function applied to the array.
17201720
If the function returns a tuple with more than one element
17211721
a MultiIndex will be returned.
1722+
1723+
Examples
1724+
--------
1725+
>>> ext_arr = pd.array([1, 2, 3])
1726+
>>> ext_arr.map(str)
1727+
array(['1', '2', '3'], dtype=object)
17221728
"""
17231729
return map_array(self, mapper, na_action=na_action)
17241730

pandas/core/config_init.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -711,13 +711,13 @@ def register_converter_cb(key) -> None:
711711
styler_max_rows = """
712712
: int, optional
713713
The maximum number of rows that will be rendered. May still be reduced to
714-
satsify ``max_elements``, which takes precedence.
714+
satisfy ``max_elements``, which takes precedence.
715715
"""
716716

717717
styler_max_columns = """
718718
: int, optional
719719
The maximum number of columns that will be rendered. May still be reduced to
720-
satsify ``max_elements``, which takes precedence.
720+
satisfy ``max_elements``, which takes precedence.
721721
"""
722722

723723
styler_precision = """

pandas/core/dtypes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1697,7 +1697,7 @@ def pandas_dtype(dtype) -> DtypeObj:
16971697
try:
16981698
with warnings.catch_warnings():
16991699
# GH#51523 - Series.astype(np.integer) doesn't show
1700-
# numpy deprication warning of np.integer
1700+
# numpy deprecation warning of np.integer
17011701
# Hence enabling DeprecationWarning
17021702
warnings.simplefilter("always", DeprecationWarning)
17031703
npdtype = np.dtype(dtype)

pandas/core/dtypes/dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -901,7 +901,7 @@ def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset:
901901
return freq_offset
902902

903903
raise TypeError(
904-
"PeriodDtype argument should be string or BaseOffet, "
904+
"PeriodDtype argument should be string or BaseOffset, "
905905
f"got {type(freq).__name__}"
906906
)
907907

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6559,7 +6559,7 @@ def infer_objects(self, copy: bool_t | None = None) -> Self:
65596559
Parameters
65606560
----------
65616561
copy : bool, default True
6562-
Whether to make a copy for non-object or non-inferrable columns
6562+
Whether to make a copy for non-object or non-inferable columns
65636563
or Series.
65646564
65656565
Returns

pandas/io/excel/_pyxlsb.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def get_sheet_data(
8989
file_rows_needed: int | None = None,
9090
) -> list[list[Scalar]]:
9191
data: list[list[Scalar]] = []
92-
prevous_row_number = -1
92+
previous_row_number = -1
9393
# When sparse=True the rows can have different lengths and empty rows are
9494
# not returned. The cells are namedtuples of row, col, value (r, c, v).
9595
for row in sheet.rows(sparse=True):
@@ -99,9 +99,9 @@ def get_sheet_data(
9999
# trim trailing empty elements
100100
converted_row.pop()
101101
if converted_row:
102-
data.extend([[]] * (row_number - prevous_row_number - 1))
102+
data.extend([[]] * (row_number - previous_row_number - 1))
103103
data.append(converted_row)
104-
prevous_row_number = row_number
104+
previous_row_number = row_number
105105
if file_rows_needed is not None and len(data) >= file_rows_needed:
106106
break
107107
if data:

pandas/io/formats/format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ def _calc_max_rows_fitted(self) -> int | None:
740740
_, height = get_terminal_size()
741741
if self.max_rows == 0:
742742
# rows available to fill with actual data
743-
return height - self._get_number_of_auxillary_rows()
743+
return height - self._get_number_of_auxiliary_rows()
744744

745745
if self._is_screen_short(height):
746746
max_rows = height
@@ -775,7 +775,7 @@ def _is_screen_narrow(self, max_width) -> bool:
775775
def _is_screen_short(self, max_height) -> bool:
776776
return bool(self.max_rows == 0 and len(self.frame) > max_height)
777777

778-
def _get_number_of_auxillary_rows(self) -> int:
778+
def _get_number_of_auxiliary_rows(self) -> int:
779779
"""Get number of rows occupied by prompt, dots and dimension info."""
780780
dot_row = 1
781781
prompt_row = 1

pandas/io/html.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,6 @@ def __init__(self, *args, **kwargs) -> None:
582582
def _parse_tables(self, doc, match, attrs):
583583
element_name = self._strainer.name
584584
tables = doc.find_all(element_name, attrs=attrs)
585-
586585
if not tables:
587586
raise ValueError("No tables found")
588587

@@ -592,13 +591,15 @@ def _parse_tables(self, doc, match, attrs):
592591

593592
for table in tables:
594593
if self.displayed_only:
594+
for elem in table.find_all("style"):
595+
elem.decompose()
596+
595597
for elem in table.find_all(style=re.compile(r"display:\s*none")):
596598
elem.decompose()
597599

598600
if table not in unique_tables and table.find(string=match) is not None:
599601
result.append(table)
600602
unique_tables.add(table)
601-
602603
if not result:
603604
raise ValueError(f"No tables found matching pattern {repr(match.pattern)}")
604605
return result
@@ -730,10 +731,11 @@ def _parse_tables(self, doc, match, kwargs):
730731
# lxml utilizes XPATH 1.0 which does not have regex
731732
# support. As a result, we find all elements with a style
732733
# attribute and iterate them to check for display:none
734+
for elem in table.xpath(".//style"):
735+
elem.drop_tree()
733736
for elem in table.xpath(".//*[@style]"):
734737
if "display:none" in elem.attrib.get("style", "").replace(" ", ""):
735738
elem.drop_tree()
736-
737739
if not tables:
738740
raise ValueError(f"No tables found matching regex {repr(pattern)}")
739741
return tables
@@ -1170,6 +1172,7 @@ def read_html(
11701172
'{None, "header", "footer", "body", "all"}, got '
11711173
f'"{extract_links}"'
11721174
)
1175+
11731176
validate_header_arg(header)
11741177
check_dtype_backend(dtype_backend)
11751178

pandas/tests/arrays/categorical/test_operators.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ def test_comparisons(self, factor):
8686
cat_rev > cat_rev_base2
8787

8888
# Only categories with same ordering information can be compared
89-
cat_unorderd = cat.set_ordered(False)
89+
cat_unordered = cat.set_ordered(False)
9090
assert not (cat > cat).any()
9191

9292
with pytest.raises(TypeError, match=msg):
93-
cat > cat_unorderd
93+
cat > cat_unordered
9494

9595
# comparison (in both directions) with Series will raise
9696
s = Series(["b", "b", "b"])

pandas/tests/dtypes/test_dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ def test_freq_argument_required(self):
523523
with pytest.raises(TypeError, match=msg):
524524
PeriodDtype()
525525

526-
msg = "PeriodDtype argument should be string or BaseOffet, got NoneType"
526+
msg = "PeriodDtype argument should be string or BaseOffset, got NoneType"
527527
with pytest.raises(TypeError, match=msg):
528528
# GH#51790
529529
PeriodDtype(None)

pandas/tests/frame/methods/test_isetitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_isetitem_ea_df_scalar_indexer(self):
3838
)
3939
tm.assert_frame_equal(df, expected)
4040

41-
def test_isetitem_dimension_missmatch(self):
41+
def test_isetitem_dimension_mismatch(self):
4242
# GH#51701
4343
df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
4444
value = df.copy()

pandas/tests/frame/test_arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2006,7 +2006,7 @@ def test_inplace_arithmetic_series_update(using_copy_on_write):
20062006
tm.assert_frame_equal(df, expected)
20072007

20082008

2009-
def test_arithemetic_multiindex_align():
2009+
def test_arithmetic_multiindex_align():
20102010
"""
20112011
Regression test for: https://github.com/pandas-dev/pandas/issues/33765
20122012
"""

pandas/tests/frame/test_npfuncs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212

1313
class TestAsArray:
14-
def test_asarray_homogenous(self):
14+
def test_asarray_homogeneous(self):
1515
df = DataFrame({"A": Categorical([1, 2]), "B": Categorical([1, 2])})
1616
result = np.asarray(df)
1717
# may change from object in the future

pandas/tests/frame/test_unary.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_invert_mixed(self):
8484
)
8585
tm.assert_frame_equal(result, expected)
8686

87-
def test_invert_empy_not_input(self):
87+
def test_invert_empty_not_input(self):
8888
# GH#51032
8989
df = pd.DataFrame()
9090
result = ~df

pandas/tests/groupby/test_filters.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -603,12 +603,12 @@ def test_filter_non_bool_raises():
603603
def test_filter_dropna_with_empty_groups():
604604
# GH 10780
605605
data = Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3))
606-
groupped = data.groupby(level=0)
607-
result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False)
606+
grouped = data.groupby(level=0)
607+
result_false = grouped.filter(lambda x: x.mean() > 1, dropna=False)
608608
expected_false = Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3))
609609
tm.assert_series_equal(result_false, expected_false)
610610

611-
result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True)
611+
result_true = grouped.filter(lambda x: x.mean() > 1, dropna=True)
612612
expected_true = Series(index=pd.Index([], dtype=int), dtype=np.float64)
613613
tm.assert_series_equal(result_true, expected_true)
614614

pandas/tests/io/json/test_readlines.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
320320

321321
def test_readjson_nrows_requires_lines(engine):
322322
# GH 33916
323-
# Test ValuError raised if nrows is set without setting lines in read_json
323+
# Test ValueError raised if nrows is set without setting lines in read_json
324324
jsonl = """{"a": 1, "b": 2}
325325
{"a": 3, "b": 4}
326326
{"a": 5, "b": 6}

pandas/tests/io/test_html.py

+25
Original file line numberDiff line numberDiff line change
@@ -1495,3 +1495,28 @@ def test_invalid_dtype_backend(self):
14951495
)
14961496
with pytest.raises(ValueError, match=msg):
14971497
read_html("test", dtype_backend="numpy")
1498+
1499+
def test_style_tag(self):
1500+
# GH 48316
1501+
data = """
1502+
<table>
1503+
<tr>
1504+
<th>
1505+
<style>.style</style>
1506+
A
1507+
</th>
1508+
<th>B</th>
1509+
</tr>
1510+
<tr>
1511+
<td>A1</td>
1512+
<td>B1</td>
1513+
</tr>
1514+
<tr>
1515+
<td>A2</td>
1516+
<td>B2</td>
1517+
</tr>
1518+
</table>
1519+
"""
1520+
result = self.read_html(data)[0]
1521+
expected = DataFrame(data=[["A1", "B1"], ["A2", "B2"]], columns=["A", "B"])
1522+
tm.assert_frame_equal(result, expected)

pandas/tests/resample/test_resample_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -983,12 +983,12 @@ def test_df_axis_param_depr():
983983
index.name = "date"
984984
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index).T
985985

986-
# Deprication error when axis=1 is explicitly passed
986+
# Deprecation error when axis=1 is explicitly passed
987987
warning_msg = "DataFrame.resample with axis=1 is deprecated."
988988
with tm.assert_produces_warning(FutureWarning, match=warning_msg):
989989
df.resample("M", axis=1)
990990

991-
# Deprication error when axis=0 is explicitly passed
991+
# Deprecation error when axis=0 is explicitly passed
992992
df = df.T
993993
warning_msg = (
994994
"The 'axis' keyword in DataFrame.resample is deprecated and "

pandas/tests/reshape/concat/test_concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def test_concat_mixed_objs(self):
338338
result = concat([s1, df, s2], ignore_index=True)
339339
tm.assert_frame_equal(result, expected)
340340

341-
def test_dtype_coerceion(self):
341+
def test_dtype_coercion(self):
342342
# 12411
343343
df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]})
344344

pandas/tests/scalar/timestamp/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ def test_constructor_fromisocalendar(self):
730730
assert isinstance(result, Timestamp)
731731

732732

733-
def test_constructor_ambigous_dst():
733+
def test_constructor_ambiguous_dst():
734734
# GH 24329
735735
# Make sure that calling Timestamp constructor
736736
# on Timestamp created from ambiguous time

pandas/tests/scalar/timestamp/test_unary_ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
class TestTimestampUnaryOps:
3131
# --------------------------------------------------------------
32-
def test_round_divison_by_zero_raises(self):
32+
def test_round_division_by_zero_raises(self):
3333
ts = Timestamp("2016-01-01")
3434

3535
msg = "Division by zero in rounding"

pandas/tests/series/indexing/test_getitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ def test_getitem_dataframe_raises():
576576
ser[df > 5]
577577

578578

579-
def test_getitem_assignment_series_aligment():
579+
def test_getitem_assignment_series_alignment():
580580
# https://github.com/pandas-dev/pandas/issues/37427
581581
# with getitem, when assigning with a Series, it is not first aligned
582582
ser = Series(range(10))

0 commit comments

Comments
 (0)