Skip to content

Commit dd59499

Browse files
committed
Merge branch 'main' of github.com:Wong2333/pandas
2 parents d81e278 + ca91dd4 commit dd59499

File tree

14 files changed

+152
-51
lines changed

14 files changed

+152
-51
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8484
-i "pandas.arrays.IntervalArray.length SA01" \
8585
-i "pandas.arrays.NumpyExtensionArray SA01" \
8686
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
87-
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
8887
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
8988
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
9089
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
@@ -95,9 +94,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9594
-i "pandas.core.resample.Resampler.std SA01" \
9695
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
9796
-i "pandas.core.resample.Resampler.var SA01" \
98-
-i "pandas.errors.NullFrequencyError SA01" \
99-
-i "pandas.errors.NumbaUtilError SA01" \
100-
-i "pandas.errors.PerformanceWarning SA01" \
10197
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
10298
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
10399
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \

doc/source/reference/frame.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ Reindexing / selection / label manipulation
185185
DataFrame.duplicated
186186
DataFrame.equals
187187
DataFrame.filter
188-
DataFrame.head
189188
DataFrame.idxmax
190189
DataFrame.idxmin
191190
DataFrame.reindex
@@ -196,7 +195,6 @@ Reindexing / selection / label manipulation
196195
DataFrame.sample
197196
DataFrame.set_axis
198197
DataFrame.set_index
199-
DataFrame.tail
200198
DataFrame.take
201199
DataFrame.truncate
202200

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ Datetimelike
626626
- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
627627
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
628628
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
629+
- Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`)
629630
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
630631
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
631632
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
@@ -798,6 +799,7 @@ Other
798799
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
799800
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
800801
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
802+
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
801803
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
802804

803805
.. ***DO NOT USE THIS SECTION***

pandas/core/arrays/arrow/accessors.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@ def len(self) -> Series:
117117

118118
value_lengths = pc.list_value_length(self._pa_array)
119119
return Series(
120-
value_lengths, dtype=ArrowDtype(value_lengths.type), index=self._data.index
120+
value_lengths,
121+
dtype=ArrowDtype(value_lengths.type),
122+
index=self._data.index,
123+
name=self._data.name,
121124
)
122125

123126
def __getitem__(self, key: int | slice) -> Series:
@@ -162,7 +165,10 @@ def __getitem__(self, key: int | slice) -> Series:
162165
# key = pc.add(key, pc.list_value_length(self._pa_array))
163166
element = pc.list_element(self._pa_array, key)
164167
return Series(
165-
element, dtype=ArrowDtype(element.type), index=self._data.index
168+
element,
169+
dtype=ArrowDtype(element.type),
170+
index=self._data.index,
171+
name=self._data.name,
166172
)
167173
elif isinstance(key, slice):
168174
if pa_version_under11p0:
@@ -181,7 +187,12 @@ def __getitem__(self, key: int | slice) -> Series:
181187
if step is None:
182188
step = 1
183189
sliced = pc.list_slice(self._pa_array, start, stop, step)
184-
return Series(sliced, dtype=ArrowDtype(sliced.type), index=self._data.index)
190+
return Series(
191+
sliced,
192+
dtype=ArrowDtype(sliced.type),
193+
index=self._data.index,
194+
name=self._data.name,
195+
)
185196
else:
186197
raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
187198

@@ -223,7 +234,12 @@ def flatten(self) -> Series:
223234
counts = pa.compute.list_value_length(self._pa_array)
224235
flattened = pa.compute.list_flatten(self._pa_array)
225236
index = self._data.index.repeat(counts.fill_null(pa.scalar(0, counts.type)))
226-
return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
237+
return Series(
238+
flattened,
239+
dtype=ArrowDtype(flattened.type),
240+
index=index,
241+
name=self._data.name,
242+
)
227243

228244

229245
class StructAccessor(ArrowAccessor):

pandas/core/computation/pytables.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def generate(self, v) -> str:
205205
val = v.tostring(self.encoding)
206206
return f"({self.lhs} {self.op} {val})"
207207

208-
def convert_value(self, v) -> TermValue:
208+
def convert_value(self, conv_val) -> TermValue:
209209
"""
210210
convert the expression that is in the term to something that is
211211
accepted by pytables
@@ -219,44 +219,44 @@ def stringify(value):
219219
kind = ensure_decoded(self.kind)
220220
meta = ensure_decoded(self.meta)
221221
if kind == "datetime" or (kind and kind.startswith("datetime64")):
222-
if isinstance(v, (int, float)):
223-
v = stringify(v)
224-
v = ensure_decoded(v)
225-
v = Timestamp(v).as_unit("ns")
226-
if v.tz is not None:
227-
v = v.tz_convert("UTC")
228-
return TermValue(v, v._value, kind)
222+
if isinstance(conv_val, (int, float)):
223+
conv_val = stringify(conv_val)
224+
conv_val = ensure_decoded(conv_val)
225+
conv_val = Timestamp(conv_val).as_unit("ns")
226+
if conv_val.tz is not None:
227+
conv_val = conv_val.tz_convert("UTC")
228+
return TermValue(conv_val, conv_val._value, kind)
229229
elif kind in ("timedelta64", "timedelta"):
230-
if isinstance(v, str):
231-
v = Timedelta(v)
230+
if isinstance(conv_val, str):
231+
conv_val = Timedelta(conv_val)
232232
else:
233-
v = Timedelta(v, unit="s")
234-
v = v.as_unit("ns")._value
235-
return TermValue(int(v), v, kind)
233+
conv_val = Timedelta(conv_val, unit="s")
234+
conv_val = conv_val.as_unit("ns")._value
235+
return TermValue(int(conv_val), conv_val, kind)
236236
elif meta == "category":
237237
metadata = extract_array(self.metadata, extract_numpy=True)
238238
result: npt.NDArray[np.intp] | np.intp | int
239-
if v not in metadata:
239+
if conv_val not in metadata:
240240
result = -1
241241
else:
242-
result = metadata.searchsorted(v, side="left")
242+
result = metadata.searchsorted(conv_val, side="left")
243243
return TermValue(result, result, "integer")
244244
elif kind == "integer":
245245
try:
246-
v_dec = Decimal(v)
246+
v_dec = Decimal(conv_val)
247247
except InvalidOperation:
248248
# GH 54186
249249
# convert v to float to raise float's ValueError
250-
float(v)
250+
float(conv_val)
251251
else:
252-
v = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN"))
253-
return TermValue(v, v, kind)
252+
conv_val = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN"))
253+
return TermValue(conv_val, conv_val, kind)
254254
elif kind == "float":
255-
v = float(v)
256-
return TermValue(v, v, kind)
255+
conv_val = float(conv_val)
256+
return TermValue(conv_val, conv_val, kind)
257257
elif kind == "bool":
258-
if isinstance(v, str):
259-
v = v.strip().lower() not in [
258+
if isinstance(conv_val, str):
259+
conv_val = conv_val.strip().lower() not in [
260260
"false",
261261
"f",
262262
"no",
@@ -268,13 +268,13 @@ def stringify(value):
268268
"",
269269
]
270270
else:
271-
v = bool(v)
272-
return TermValue(v, v, kind)
273-
elif isinstance(v, str):
271+
conv_val = bool(conv_val)
272+
return TermValue(conv_val, conv_val, kind)
273+
elif isinstance(conv_val, str):
274274
# string quoting
275-
return TermValue(v, stringify(v), "string")
275+
return TermValue(conv_val, stringify(conv_val), "string")
276276
else:
277-
raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column")
277+
raise TypeError(f"Cannot compare {conv_val} of type {type(conv_val)} to {kind} column")
278278

279279
def convert_values(self) -> None:
280280
pass

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,7 @@ def shape(self) -> tuple[int, int]:
10181018
10191019
See Also
10201020
--------
1021-
ndarray.shape : Tuple of array dimensions.
1021+
numpy.ndarray.shape : Tuple of array dimensions.
10221022
10231023
Examples
10241024
--------

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ def ndim(self) -> int:
640640
641641
See Also
642642
--------
643-
ndarray.ndim : Number of array dimensions.
643+
numpy.ndarray.ndim : Number of array dimensions.
644644
645645
Examples
646646
--------

pandas/core/resample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ def bfill(self, limit: int | None = None):
694694
695695
References
696696
----------
697-
.. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
697+
.. [1] https://en.wikipedia.org/wiki/Imputation_%28statistics%29
698698
699699
Examples
700700
--------

pandas/core/strings/accessor.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,11 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=lib.no_default):
13741374
"""
13751375
Determine if each string starts with a match of a regular expression.
13761376
1377+
Determines whether each string in the Series or Index starts with a
1378+
match to a specified regular expression. This function is especially
1379+
useful for validating prefixes, such as ensuring that codes, tags, or
1380+
identifiers begin with a specific pattern.
1381+
13771382
Parameters
13781383
----------
13791384
pat : str
@@ -1419,6 +1424,11 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=lib.no_default):
14191424
"""
14201425
Determine if each string entirely matches a regular expression.
14211426
1427+
Checks if each string in the Series or Index fully matches the
1428+
specified regular expression pattern. This function is useful when the
1429+
requirement is for an entire string to conform to a pattern, such as
1430+
validating formats like phone numbers or email addresses.
1431+
14221432
Parameters
14231433
----------
14241434
pat : str
@@ -1647,6 +1657,10 @@ def repeat(self, repeats):
16471657
"""
16481658
Duplicate each string in the Series or Index.
16491659
1660+
Duplicates each string in the Series or Index, either by applying the
1661+
same repeat count to all elements or by using different repeat values
1662+
for each element.
1663+
16501664
Parameters
16511665
----------
16521666
repeats : int or sequence of int
@@ -1710,6 +1724,12 @@ def pad(
17101724
"""
17111725
Pad strings in the Series/Index up to width.
17121726
1727+
This function pads strings in a Series or Index to a specified width,
1728+
filling the extra space with a character of your choice. It provides
1729+
flexibility in positioning the padding, allowing it to be added to the
1730+
left, right, or both sides. This is useful for formatting strings to
1731+
align text or ensure consistent string lengths in data processing.
1732+
17131733
Parameters
17141734
----------
17151735
width : int
@@ -1920,6 +1940,11 @@ def slice(self, start=None, stop=None, step=None):
19201940
"""
19211941
Slice substrings from each element in the Series or Index.
19221942
1943+
Slicing substrings from strings in a Series or Index helps extract
1944+
specific portions of data, making it easier to analyze or manipulate
1945+
text. This is useful for tasks like parsing structured text fields or
1946+
isolating parts of strings with a consistent format.
1947+
19231948
Parameters
19241949
----------
19251950
start : int, optional
@@ -1996,6 +2021,11 @@ def slice_replace(self, start=None, stop=None, repl=None):
19962021
"""
19972022
Replace a positional slice of a string with another value.
19982023
2024+
This function allows replacing specific parts of a string in a Series
2025+
or Index by specifying start and stop positions. It is useful for
2026+
modifying substrings in a controlled way, such as updating sections of
2027+
text based on their positions or patterns.
2028+
19992029
Parameters
20002030
----------
20012031
start : int, optional

pandas/core/tools/datetimes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from pandas.core.dtypes.common import (
4545
ensure_object,
4646
is_float,
47+
is_float_dtype,
4748
is_integer,
4849
is_integer_dtype,
4950
is_list_like,
@@ -1153,6 +1154,10 @@ def coerce(values):
11531154
# we allow coercion to if errors allows
11541155
values = to_numeric(values, errors=errors)
11551156

1157+
# prevent prevision issues in case of float32 # GH#60506
1158+
if is_float_dtype(values.dtype):
1159+
values = values.astype("float64")
1160+
11561161
# prevent overflow in case of int8 or int16
11571162
if is_integer_dtype(values.dtype):
11581163
values = values.astype("int64")

pandas/errors/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ class NullFrequencyError(ValueError):
4545
Particularly ``DatetimeIndex.shift``, ``TimedeltaIndex.shift``,
4646
``PeriodIndex.shift``.
4747
48+
See Also
49+
--------
50+
Index.shift : Shift values of Index.
51+
Series.shift : Shift values of Series.
52+
4853
Examples
4954
--------
5055
>>> df = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
@@ -58,6 +63,12 @@ class PerformanceWarning(Warning):
5863
"""
5964
Warning raised when there is a possible performance impact.
6065
66+
See Also
67+
--------
68+
DataFrame.set_index : Set the DataFrame index using existing columns.
69+
DataFrame.loc : Access a group of rows and columns by label(s) \
70+
or a boolean array.
71+
6172
Examples
6273
--------
6374
>>> df = pd.DataFrame(
@@ -385,6 +396,13 @@ class NumbaUtilError(Exception):
385396
"""
386397
Error raised for unsupported Numba engine routines.
387398
399+
See Also
400+
--------
401+
DataFrame.groupby : Group DataFrame using a mapper or by a Series of columns.
402+
Series.groupby : Group Series using a mapper or by a Series of columns.
403+
DataFrame.agg : Aggregate using one or more operations over the specified axis.
404+
Series.agg : Aggregate using one or more operations over the specified axis.
405+
388406
Examples
389407
--------
390408
>>> df = pd.DataFrame(

0 commit comments

Comments
 (0)