Skip to content

Commit 6fd8986

Browse files
committed
Merge branch 'main' into add-mask-fillna
2 parents e2f32cb + e3b2de8 commit 6fd8986

File tree

10 files changed

+46
-48
lines changed

10 files changed

+46
-48
lines changed

ci/code_checks.sh

-3
Original file line numberDiff line numberDiff line change
@@ -146,15 +146,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
146146
-i "pandas.tseries.offsets.CustomBusinessMonthBegin PR02" \
147147
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.calendar GL08" \
148148
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.holidays GL08" \
149-
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.is_on_offset SA01" \
150149
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.m_offset GL08" \
151150
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.n GL08" \
152151
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.normalize GL08" \
153152
-i "pandas.tseries.offsets.CustomBusinessMonthBegin.weekmask GL08" \
154153
-i "pandas.tseries.offsets.CustomBusinessMonthEnd PR02" \
155154
-i "pandas.tseries.offsets.CustomBusinessMonthEnd.calendar GL08" \
156155
-i "pandas.tseries.offsets.CustomBusinessMonthEnd.holidays GL08" \
157-
-i "pandas.tseries.offsets.CustomBusinessMonthEnd.is_on_offset SA01" \
158156
-i "pandas.tseries.offsets.CustomBusinessMonthEnd.m_offset GL08" \
159157
-i "pandas.tseries.offsets.CustomBusinessMonthEnd.n GL08" \
160158
-i "pandas.tseries.offsets.CustomBusinessMonthEnd.normalize GL08" \
@@ -191,7 +189,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
191189
-i "pandas.tseries.offsets.Hour.is_on_offset GL08" \
192190
-i "pandas.tseries.offsets.Hour.n GL08" \
193191
-i "pandas.tseries.offsets.Hour.normalize GL08" \
194-
-i "pandas.tseries.offsets.LastWeekOfMonth SA01" \
195192
-i "pandas.tseries.offsets.LastWeekOfMonth.is_on_offset GL08" \
196193
-i "pandas.tseries.offsets.LastWeekOfMonth.n GL08" \
197194
-i "pandas.tseries.offsets.LastWeekOfMonth.normalize GL08" \

pandas/_libs/tslibs/offsets.pyx

+22
Original file line numberDiff line numberDiff line change
@@ -720,11 +720,24 @@ cdef class BaseOffset:
720720
"""
721721
Return boolean whether a timestamp intersects with this frequency.
722722

723+
This method determines if a given timestamp aligns with the start
724+
of a custom business month, as defined by this offset. It accounts
725+
for custom rules, such as skipping weekends or other non-business days,
726+
and checks whether the provided datetime falls on a valid business day
727+
that marks the beginning of the custom business month.
728+
723729
Parameters
724730
----------
725731
dt : datetime.datetime
726732
Timestamp to check intersections with frequency.
727733

734+
See Also
735+
--------
736+
tseries.offsets.CustomBusinessMonthBegin : Represents the start of a custom
737+
business month.
738+
tseries.offsets.CustomBusinessMonthEnd : Represents the end of a custom
739+
business month.
740+
728741
Examples
729742
--------
730743
>>> ts = pd.Timestamp(2022, 1, 1)
@@ -3710,6 +3723,15 @@ cdef class LastWeekOfMonth(WeekOfMonthMixin):
37103723
- 5 is Saturday
37113724
- 6 is Sunday.
37123725
3726+
See Also
3727+
--------
3728+
tseries.offsets.WeekOfMonth :
3729+
Date offset for a specific weekday in a month.
3730+
tseries.offsets.MonthEnd :
3731+
Date offset for the end of the month.
3732+
tseries.offsets.BMonthEnd :
3733+
Date offset for the last business day of the month.
3734+
37133735
Examples
37143736
--------
37153737
>>> ts = pd.Timestamp(2022, 1, 1)

pandas/core/arrays/arrow/array.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1398,7 +1398,7 @@ def _to_datetimearray(self) -> DatetimeArray:
13981398
np_dtype = np.dtype(f"M8[{pa_type.unit}]")
13991399
dtype = tz_to_dtype(pa_type.tz, pa_type.unit)
14001400
np_array = self._pa_array.to_numpy()
1401-
np_array = np_array.astype(np_dtype)
1401+
np_array = np_array.astype(np_dtype, copy=False)
14021402
return DatetimeArray._simple_new(np_array, dtype=dtype)
14031403

14041404
def _to_timedeltaarray(self) -> TimedeltaArray:
@@ -1409,7 +1409,7 @@ def _to_timedeltaarray(self) -> TimedeltaArray:
14091409
assert pa.types.is_duration(pa_type)
14101410
np_dtype = np.dtype(f"m8[{pa_type.unit}]")
14111411
np_array = self._pa_array.to_numpy()
1412-
np_array = np_array.astype(np_dtype)
1412+
np_array = np_array.astype(np_dtype, copy=False)
14131413
return TimedeltaArray._simple_new(np_array, dtype=np_dtype)
14141414

14151415
def _values_for_json(self) -> np.ndarray:

pandas/io/parsers/arrow_parser_wrapper.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
165165
# The only way self.names is not the same length as number of cols is
166166
# if we have int index_col. We should just pad the names(they will get
167167
# removed anyways) to expected length then.
168-
self.names = list(range(num_cols - len(self.names))) + self.names
168+
columns_prefix = [str(x) for x in range(num_cols - len(self.names))]
169+
self.names = columns_prefix + self.names
169170
multi_index_named = False
170171
frame.columns = self.names
171172

pandas/tests/frame/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def float_string_frame():
3333
df = DataFrame(
3434
np.random.default_rng(2).standard_normal((30, 4)),
3535
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
36-
columns=Index(list("ABCD"), dtype=object),
36+
columns=Index(list("ABCD")),
3737
)
3838
df["foo"] = "bar"
3939
return df

pandas/tests/frame/constructors/test_from_dict.py

-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ def test_constructor_list_of_series(self):
108108
expected = DataFrame.from_dict(sdict, orient="index")
109109
tm.assert_frame_equal(result, expected)
110110

111-
@pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
112111
def test_constructor_orient(self, float_string_frame):
113112
data_dict = float_string_frame.T._series
114113
recons = DataFrame.from_dict(data_dict, orient="index")

pandas/tests/frame/test_block_internals.py

+10-25
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
1210
import pandas as pd
1311
from pandas import (
1412
Categorical,
@@ -162,21 +160,7 @@ def test_constructor_with_convert(self):
162160
)
163161
tm.assert_series_equal(result, expected)
164162

165-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
166163
def test_construction_with_mixed(self, float_string_frame, using_infer_string):
167-
# test construction edge cases with mixed types
168-
169-
# f7u12, this does not work without extensive workaround
170-
data = [
171-
[datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)],
172-
[datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)],
173-
]
174-
df = DataFrame(data)
175-
176-
# check dtypes
177-
result = df.dtypes
178-
expected = Series({"datetime64[us]": 3})
179-
180164
# mixed-type frames
181165
float_string_frame["datetime"] = datetime.now()
182166
float_string_frame["timedelta"] = timedelta(days=1, seconds=1)
@@ -196,13 +180,11 @@ def test_construction_with_mixed(self, float_string_frame, using_infer_string):
196180
)
197181
tm.assert_series_equal(result, expected)
198182

199-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
200183
def test_construction_with_conversions(self):
201184
# convert from a numpy array of non-ns timedelta64; as of 2.0 this does
202185
# *not* convert
203186
arr = np.array([1, 2, 3], dtype="timedelta64[s]")
204-
df = DataFrame(index=range(3))
205-
df["A"] = arr
187+
df = DataFrame({"A": arr})
206188
expected = DataFrame(
207189
{"A": pd.timedelta_range("00:00:01", periods=3, freq="s")}, index=range(3)
208190
)
@@ -220,11 +202,11 @@ def test_construction_with_conversions(self):
220202
assert expected.dtypes["dt1"] == "M8[s]"
221203
assert expected.dtypes["dt2"] == "M8[s]"
222204

223-
df = DataFrame(index=range(3))
224-
df["dt1"] = np.datetime64("2013-01-01")
225-
df["dt2"] = np.array(
205+
dt1 = np.datetime64("2013-01-01")
206+
dt2 = np.array(
226207
["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]"
227208
)
209+
df = DataFrame({"dt1": dt1, "dt2": dt2})
228210

229211
# df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01
230212
# 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')
@@ -401,14 +383,17 @@ def test_update_inplace_sets_valid_block_values():
401383
assert isinstance(df._mgr.blocks[0].values, Categorical)
402384

403385

404-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
405386
def test_nonconsolidated_item_cache_take():
406387
# https://github.com/pandas-dev/pandas/issues/35521
407388

408389
# create non-consolidated dataframe with object dtype columns
409-
df = DataFrame()
410-
df["col1"] = Series(["a"], dtype=object)
390+
df = DataFrame(
391+
{
392+
"col1": Series(["a"], dtype=object),
393+
}
394+
)
411395
df["col2"] = Series([0], dtype=object)
396+
assert not df._mgr.is_consolidated()
412397

413398
# access column (item cache)
414399
df["col1"] == "A"

pandas/tests/groupby/methods/test_size.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
Index,
@@ -76,18 +74,16 @@ def test_size_series_masked_type_returns_Int64(dtype):
7674
tm.assert_series_equal(result, expected)
7775

7876

79-
# TODO(infer_string) in case the column is object dtype, it should preserve that dtype
80-
# for the result's index
81-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
82-
def test_size_strings(any_string_dtype):
77+
def test_size_strings(any_string_dtype, using_infer_string):
8378
# GH#55627
8479
dtype = any_string_dtype
8580
df = DataFrame({"a": ["a", "a", "b"], "b": "a"}, dtype=dtype)
8681
result = df.groupby("a")["b"].size()
8782
exp_dtype = "Int64" if dtype == "string[pyarrow]" else "int64"
83+
exp_index_dtype = "str" if using_infer_string and dtype == "object" else dtype
8884
expected = Series(
8985
[2, 1],
90-
index=Index(["a", "b"], name="a", dtype=dtype),
86+
index=Index(["a", "b"], name="a", dtype=exp_index_dtype),
9187
name="b",
9288
dtype=exp_dtype,
9389
)

pandas/tests/io/parser/common/test_index.py

-3
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,6 @@ def test_pass_names_with_index(all_parsers, data, kwargs, expected):
9090
def test_multi_index_no_level_names(
9191
request, all_parsers, index_col, using_infer_string
9292
):
93-
if using_infer_string and all_parsers.engine == "pyarrow":
94-
# result should have string columns instead of object dtype
95-
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
9693
data = """index1,index2,A,B,C,D
9794
foo,one,2,3,4,5
9895
foo,two,7,8,9,10

pandas/tests/io/test_parquet.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,7 @@ def fp(request):
104104

105105
@pytest.fixture
106106
def df_compat():
107-
# TODO(infer_string) should this give str columns?
108-
return pd.DataFrame(
109-
{"A": [1, 2, 3], "B": "foo"}, columns=pd.Index(["A", "B"], dtype=object)
110-
)
107+
return pd.DataFrame({"A": [1, 2, 3], "B": "foo"}, columns=pd.Index(["A", "B"]))
111108

112109

113110
@pytest.fixture
@@ -686,7 +683,11 @@ def test_parquet_read_from_url(self, httpserver, datapath, df_compat, engine):
686683
with open(datapath("io", "data", "parquet", "simple.parquet"), mode="rb") as f:
687684
httpserver.serve_content(content=f.read())
688685
df = read_parquet(httpserver.url, engine=engine)
689-
tm.assert_frame_equal(df, df_compat)
686+
687+
expected = df_compat
688+
if pa_version_under19p0:
689+
expected.columns = expected.columns.astype(object)
690+
tm.assert_frame_equal(df, expected)
690691

691692

692693
class TestParquetPyArrow(Base):

0 commit comments

Comments
 (0)