Skip to content

Commit 2249b4b

Browse files
authored
Merge branch 'pandas-dev:main' into offsets_docs_h_s_d
2 parents c881fdc + a68b97d commit 2249b4b

File tree

495 files changed

+8323
-6966
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

495 files changed

+8323
-6966
lines changed

.circleci/setup_env.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ source activate pandas-dev
4848
# downstream CI jobs that may also build pandas from source.
4949
export PANDAS_CI=1
5050

51-
if pip list | grep -q ^pandas; then
51+
if pip show pandas 1>/dev/null; then
5252
echo
5353
echo "remove any installed pandas package w/o removing anything else"
54-
pip uninstall -y pandas || true
54+
pip uninstall -y pandas
5555
fi
5656

5757
echo "Install pandas"

.github/workflows/package-checks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
runs-on: ubuntu-22.04
2525
strategy:
2626
matrix:
27-
extra: ["test", "performance", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "all"]
27+
extra: ["test", "performance", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output_formatting", "clipboard", "compression", "consortium-standard", "all"]
2828
fail-fast: false
2929
name: Install Extras - ${{ matrix.extra }}
3030
concurrency:

.github/workflows/unit-tests.yml

-2
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,6 @@ jobs:
333333
PYTEST_WORKERS: "auto"
334334
PANDAS_CI: 1
335335
PATTERN: "not slow and not network and not clipboard and not single_cpu"
336-
COVERAGE: true
337336
PYTEST_TARGET: pandas
338337

339338
steps:
@@ -351,7 +350,6 @@ jobs:
351350
python --version
352351
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1
353352
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
354-
python -m pip install git+https://github.com/nedbat/coveragepy.git
355353
python -m pip install versioneer[toml]
356354
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
357355
python -m pip list

.github/workflows/wheels.yml

+35-5
Original file line numberDiff line numberDiff line change
@@ -104,21 +104,51 @@ jobs:
104104
with:
105105
fetch-depth: 0
106106

107+
# TODO: Build wheels from sdist again
108+
# There's some sort of weird race condition?
109+
# within Github that makes the sdist be missing files
110+
107111
# We need to build wheels from the sdist since the sdist
108112
# removes unnecessary files from the release
109-
- name: Download sdist
113+
- name: Download sdist (not macOS)
114+
if: ${{ matrix.buildplat[1] != 'macosx_*' }}
110115
uses: actions/download-artifact@v3
111116
with:
112117
name: sdist
113118
path: ./dist
114119

120+
- name: Set up Python (macOS)
121+
if: ${{ matrix.buildplat[1] == 'macosx_*' }}
122+
uses: actions/setup-python@v4
123+
with:
124+
python-version: '3.11'
125+
126+
# Work around https://github.com/actions/cache/issues/403 by using GNU tar
127+
# instead of BSD tar.
128+
# borrowed from https://github.com/rust-lang/rust-analyzer/pull/6208/files
129+
- name: Install GNU tar
130+
if: ${{ matrix.buildplat[1] == 'macosx_*' }}
131+
run: |
132+
brew install gnu-tar
133+
echo PATH="/usr/local/opt/gnu-tar/libexec/gnubin:$PATH" >> $GITHUB_ENV
134+
135+
# Python version used to build sdist doesn't matter
136+
# wheel will be built from sdist with the correct version
137+
- name: Build sdist (macOS)
138+
if: ${{ matrix.buildplat[1] == 'macosx_*' }}
139+
run: |
140+
python -m pip install build
141+
python -m build --sdist
142+
143+
- name: Output sdist name (macOS)
144+
id: save-path
145+
shell: bash -el {0}
146+
run: echo "sdist_name=$(ls ./dist)" >> "$GITHUB_ENV"
147+
115148
- name: Build wheels
116149
uses: pypa/[email protected]
117-
# TODO: Build wheels from sdist again
118-
# There's some sort of weird race condition?
119-
# within Github that makes the sdist be missing files
120150
with:
121-
package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
151+
package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
122152
env:
123153
CIBW_PRERELEASE_PYTHONS: True
124154
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

.pre-commit-config.yaml

+7-13
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ ci:
2020
repos:
2121
- repo: https://github.com/hauntsaninja/black-pre-commit-mirror
2222
# black compiled with mypyc
23-
rev: 23.3.0
23+
rev: 23.7.0
2424
hooks:
2525
- id: black
2626
- repo: https://github.com/astral-sh/ruff-pre-commit
27-
rev: v0.0.277
27+
rev: v0.0.282
2828
hooks:
2929
- id: ruff
3030
args: [--exit-non-zero-on-fix]
@@ -107,7 +107,7 @@ repos:
107107
hooks:
108108
- id: isort
109109
- repo: https://github.com/asottile/pyupgrade
110-
rev: v3.7.0
110+
rev: v3.10.1
111111
hooks:
112112
- id: pyupgrade
113113
args: [--py39-plus]
@@ -138,7 +138,7 @@ repos:
138138
types: [python]
139139
stages: [manual]
140140
additional_dependencies: &pyright_dependencies
141-
141+
142142
- id: pyright
143143
# note: assumes python env is setup and activated
144144
name: pyright reportGeneralTypeIssues
@@ -251,17 +251,11 @@ repos:
251251
252252
# os.remove
253253
|os\.remove
254+
255+
# Unseeded numpy default_rng
256+
|default_rng\(\)
254257
files: ^pandas/tests/
255258
types_or: [python, cython, rst]
256-
- id: unwanted-patterns-in-ea-tests
257-
name: Unwanted patterns in EA tests
258-
language: pygrep
259-
entry: |
260-
(?x)
261-
tm.assert_(series|frame)_equal
262-
files: ^pandas/tests/extension/base/
263-
exclude: ^pandas/tests/extension/base/base\.py$
264-
types_or: [python, cython, rst]
265259
- id: unwanted-patterns-in-cython
266260
name: Unwanted patterns in Cython code
267261
language: pygrep

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44-
"Cython": ["0.29.33"],
44+
"Cython": ["3.0.0"],
4545
"matplotlib": [],
4646
"sqlalchemy": [],
4747
"scipy": [],

asv_bench/benchmarks/array.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import pandas as pd
44

5-
from .pandas_vb_common import tm
6-
75

86
class BooleanArray:
97
def setup(self):
@@ -56,7 +54,7 @@ def time_from_tuples(self):
5654
class StringArray:
5755
def setup(self):
5856
N = 100_000
59-
values = tm.rands_array(3, N)
57+
values = np.array([str(i) for i in range(N)], dtype=object)
6058
self.values_obj = np.array(values, dtype="object")
6159
self.values_str = np.array(values, dtype="U")
6260
self.values_list = values.tolist()
@@ -80,7 +78,7 @@ def setup(self, multiple_chunks):
8078
import pyarrow as pa
8179
except ImportError:
8280
raise NotImplementedError
83-
strings = tm.rands_array(3, 10_000)
81+
strings = np.array([str(i) for i in range(10_000)], dtype=object)
8482
if multiple_chunks:
8583
chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
8684
self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
@@ -127,7 +125,7 @@ def setup(self, dtype, hasna):
127125
elif dtype == "int64[pyarrow]":
128126
data = np.arange(N)
129127
elif dtype == "string[pyarrow]":
130-
data = tm.rands_array(10, N)
128+
data = np.array([str(i) for i in range(N)], dtype=object)
131129
elif dtype == "timestamp[ns][pyarrow]":
132130
data = pd.date_range("2000-01-01", freq="s", periods=N)
133131
else:

asv_bench/benchmarks/frame_methods.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -512,19 +512,10 @@ def setup(self, axis):
512512
self.df_mixed = self.df.copy()
513513
self.df_mixed["foo"] = "bar"
514514

515-
self.df.index = MultiIndex.from_arrays([self.df.index, self.df.index])
516-
self.df.columns = MultiIndex.from_arrays([self.df.columns, self.df.columns])
517-
self.df_mixed.index = MultiIndex.from_arrays(
518-
[self.df_mixed.index, self.df_mixed.index]
519-
)
520-
self.df_mixed.columns = MultiIndex.from_arrays(
521-
[self.df_mixed.columns, self.df_mixed.columns]
522-
)
523-
524-
def time_count_level_multi(self, axis):
515+
def time_count(self, axis):
525516
self.df.count(axis=axis)
526517

527-
def time_count_level_mixed_dtypes_multi(self, axis):
518+
def time_count_mixed_dtypes(self, axis):
528519
self.df_mixed.count(axis=axis)
529520

530521

asv_bench/benchmarks/series_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def setup(self, dtype):
104104
data = np.arange(N)
105105
na_value = NA
106106
elif dtype in ("string", "string[pyarrow]"):
107-
data = tm.rands_array(5, N)
107+
data = np.array([str(i) * 5 for i in range(N)], dtype=object)
108108
na_value = NA
109109
else:
110110
raise NotImplementedError

asv_bench/benchmarks/strings.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class Construction:
3434
dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
3535

3636
def setup(self, pd_type, dtype):
37-
series_arr = tm.rands_array(
38-
nchars=10, size=10**5, dtype=self.dtype_mapping[dtype]
37+
series_arr = np.array(
38+
[str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype]
3939
)
4040
if pd_type == "series":
4141
self.arr = series_arr
@@ -276,7 +276,7 @@ def time_iter(self, dtype):
276276

277277
class StringArrayConstruction:
278278
def setup(self):
279-
self.series_arr = tm.rands_array(nchars=10, size=10**5)
279+
self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object)
280280
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
281281

282282
def time_string_array_construction(self):

ci/code_checks.sh

+112-9
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,120 @@ fi
5757
### DOCSTRINGS ###
5858
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
5959

60-
MSG='Validate docstrings (EX02, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
61-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX02,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
60+
MSG='Validate docstrings (EX01, EX02, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
61+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX02,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
6262
RET=$(($RET + $?)) ; echo $MSG "DONE"
6363

64-
MSG='Partially validate docstrings (EX01)' ; echo $MSG
65-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
66-
pandas.NaT \
67-
pandas.io.stata.StataWriter.write_file \
68-
pandas.plotting.deregister_matplotlib_converters \
69-
pandas.plotting.register_matplotlib_converters \
70-
pandas.api.extensions.ExtensionArray \
64+
MSG='Partially validate docstrings (EX03)' ; echo $MSG
65+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX03 --ignore_functions \
66+
pandas.Series.loc \
67+
pandas.Series.iloc \
68+
pandas.Series.pop \
69+
pandas.Series.describe \
70+
pandas.Series.skew \
71+
pandas.Series.var \
72+
pandas.Series.last \
73+
pandas.Series.tz_convert \
74+
pandas.Series.tz_localize \
75+
pandas.Series.dt.month_name \
76+
pandas.Series.dt.day_name \
77+
pandas.Series.str.len \
78+
pandas.Series.cat.set_categories \
79+
pandas.Series.plot.bar \
80+
pandas.Series.plot.hist \
81+
pandas.Series.plot.line \
82+
pandas.Series.to_sql \
83+
pandas.Series.to_latex \
84+
pandas.errors.CategoricalConversionWarning \
85+
pandas.errors.ChainedAssignmentError \
86+
pandas.errors.ClosedFileError \
87+
pandas.errors.DatabaseError \
88+
pandas.errors.IndexingError \
89+
pandas.errors.InvalidColumnName \
90+
pandas.errors.NumExprClobberingError \
91+
pandas.errors.PossibleDataLossError \
92+
pandas.errors.PossiblePrecisionLoss \
93+
pandas.errors.SettingWithCopyError \
94+
pandas.errors.SettingWithCopyWarning \
95+
pandas.errors.SpecificationError \
96+
pandas.errors.UndefinedVariableError \
97+
pandas.errors.ValueLabelTypeMismatch \
98+
pandas.Timestamp.ceil \
99+
pandas.Timestamp.floor \
100+
pandas.Timestamp.round \
101+
pandas.read_pickle \
102+
pandas.ExcelWriter \
103+
pandas.read_json \
104+
pandas.io.json.build_table_schema \
105+
pandas.DataFrame.to_latex \
106+
pandas.io.formats.style.Styler.to_latex \
107+
pandas.read_parquet \
108+
pandas.DataFrame.to_sql \
109+
pandas.read_stata \
110+
pandas.core.resample.Resampler.pipe \
111+
pandas.core.resample.Resampler.fillna \
112+
pandas.core.resample.Resampler.interpolate \
113+
pandas.plotting.scatter_matrix \
114+
pandas.pivot \
115+
pandas.merge_asof \
116+
pandas.wide_to_long \
117+
pandas.Index.rename \
118+
pandas.Index.droplevel \
119+
pandas.Index.isin \
120+
pandas.CategoricalIndex.set_categories \
121+
pandas.MultiIndex.names \
122+
pandas.MultiIndex.droplevel \
123+
pandas.IndexSlice \
124+
pandas.DatetimeIndex.month_name \
125+
pandas.DatetimeIndex.day_name \
126+
pandas.core.window.rolling.Rolling.corr \
127+
pandas.Grouper \
128+
pandas.core.groupby.SeriesGroupBy.apply \
129+
pandas.core.groupby.DataFrameGroupBy.apply \
130+
pandas.core.groupby.SeriesGroupBy.transform \
131+
pandas.core.groupby.SeriesGroupBy.pipe \
132+
pandas.core.groupby.DataFrameGroupBy.pipe \
133+
pandas.core.groupby.DataFrameGroupBy.describe \
134+
pandas.core.groupby.DataFrameGroupBy.idxmax \
135+
pandas.core.groupby.DataFrameGroupBy.idxmin \
136+
pandas.core.groupby.DataFrameGroupBy.value_counts \
137+
pandas.core.groupby.SeriesGroupBy.describe \
138+
pandas.core.groupby.DataFrameGroupBy.boxplot \
139+
pandas.core.groupby.DataFrameGroupBy.hist \
140+
pandas.io.formats.style.Styler.map \
141+
pandas.io.formats.style.Styler.apply_index \
142+
pandas.io.formats.style.Styler.map_index \
143+
pandas.io.formats.style.Styler.format \
144+
pandas.io.formats.style.Styler.format_index \
145+
pandas.io.formats.style.Styler.relabel_index \
146+
pandas.io.formats.style.Styler.hide \
147+
pandas.io.formats.style.Styler.set_td_classes \
148+
pandas.io.formats.style.Styler.set_tooltips \
149+
pandas.io.formats.style.Styler.set_uuid \
150+
pandas.io.formats.style.Styler.pipe \
151+
pandas.io.formats.style.Styler.highlight_between \
152+
pandas.io.formats.style.Styler.highlight_quantile \
153+
pandas.io.formats.style.Styler.background_gradient \
154+
pandas.io.formats.style.Styler.text_gradient \
155+
pandas.DataFrame.values \
156+
pandas.DataFrame.loc \
157+
pandas.DataFrame.iloc \
158+
pandas.DataFrame.groupby \
159+
pandas.DataFrame.describe \
160+
pandas.DataFrame.skew \
161+
pandas.DataFrame.var \
162+
pandas.DataFrame.idxmax \
163+
pandas.DataFrame.idxmin \
164+
pandas.DataFrame.last \
165+
pandas.DataFrame.pivot \
166+
pandas.DataFrame.sort_values \
167+
pandas.DataFrame.tz_convert \
168+
pandas.DataFrame.tz_localize \
169+
pandas.DataFrame.plot.bar \
170+
pandas.DataFrame.plot.hexbin \
171+
pandas.DataFrame.plot.hist \
172+
pandas.DataFrame.plot.line \
173+
pandas.DataFrame.hist \
71174
RET=$(($RET + $?)) ; echo $MSG "DONE"
72175

73176
fi

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# build dependencies
88
- versioneer[toml]
9-
- cython>=0.29.33
9+
- cython>=3.0.0
1010
- meson[ninja]=1.0.1
1111
- meson-python=0.13.1
1212

ci/deps/actions-311-downstream_compat.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77

88
# build dependencies
99
- versioneer[toml]
10-
- cython>=0.29.33
10+
- cython>=3.0.0
1111
- meson[ninja]=1.0.1
1212
- meson-python=0.13.1
1313

@@ -73,5 +73,6 @@ dependencies:
7373
- pyyaml
7474
- py
7575
- pip:
76+
- dataframe-api-compat>=0.1.7
7677
- pyqt5>=5.15.6
7778
- tzdata>=2022.1

0 commit comments

Comments
 (0)