Skip to content

Commit 8c5f4af

Browse files
authored
Merge branch 'main' into Fix-pandas-dev#37715-column.py
2 parents d381b6f + a955989 commit 8c5f4af

File tree

86 files changed

+1472
-486
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+1472
-486
lines changed

.github/workflows/code-checks.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ jobs:
3939
with:
4040
extra_args: --verbose --all-files
4141

42-
docstring_typing_pylint:
43-
name: Docstring validation, typing, and pylint
42+
docstring_typing_manual_hooks:
43+
name: Docstring validation, typing, and other manual pre-commit hooks
4444
runs-on: ubuntu-22.04
4545
defaults:
4646
run:

.pre-commit-config.yaml

+25-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
minimum_pre_commit_version: 2.15.0
22
exclude: ^LICENSES/|\.(html|csv|svg)$
3-
# reserve "manual" for mypy and pyright
4-
default_stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
3+
# reserve "manual" for relatively slow hooks which we still want to run in CI
4+
default_stages: [
5+
commit,
6+
merge-commit,
7+
push,
8+
prepare-commit-msg,
9+
commit-msg,
10+
post-checkout,
11+
post-commit,
12+
post-merge,
13+
post-rewrite
14+
]
515
ci:
616
autofix_prs: false
717
repos:
@@ -34,9 +44,11 @@ repos:
3444
- id: debug-statements
3545
- id: end-of-file-fixer
3646
exclude: \.txt$
37-
stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
47+
stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
48+
post-checkout, post-commit, post-merge, post-rewrite]
3849
- id: trailing-whitespace
39-
stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
50+
stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
51+
post-checkout, post-commit, post-merge, post-rewrite]
4052
- repo: https://github.com/cpplint/cpplint
4153
rev: 1.6.1
4254
hooks:
@@ -46,7 +58,13 @@ repos:
4658
# this particular codebase (e.g. src/headers, src/klib). However,
4759
# we can lint all header files since they aren't "generated" like C files are.
4860
exclude: ^pandas/_libs/src/(klib|headers)/
49-
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
61+
args: [
62+
--quiet,
63+
'--extensions=c,h',
64+
'--headers=h',
65+
--recursive,
66+
'--filter=-readability/casting,-runtime/int,-build/include_subdir'
67+
]
5068
- repo: https://github.com/PyCQA/flake8
5169
rev: 6.0.0
5270
hooks:
@@ -107,6 +125,7 @@ repos:
107125
hooks:
108126
- id: yesqa
109127
additional_dependencies: *flake8_dependencies
128+
stages: [manual]
110129
- repo: local
111130
hooks:
112131
# NOTE: we make `black` a local hook because if it's installed from
@@ -214,7 +233,6 @@ repos:
214233
exclude: ^pandas/tests/extension/base/base\.py
215234
- id: pip-to-conda
216235
name: Generate pip dependency from conda
217-
description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
218236
language: python
219237
entry: python scripts/generate_pip_deps_from_conda.py
220238
files: ^(environment.yml|requirements-dev.txt)$
@@ -311,6 +329,7 @@ repos:
311329
files: ^pandas
312330
exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
313331
language: python
332+
stages: [manual]
314333
additional_dependencies:
315334
- autotyping==22.9.0
316335
- libcst==0.4.7

asv_bench/benchmarks/io/excel.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,8 @@ def setup(self, engine):
4242
def time_write_excel(self, engine):
4343
bio = BytesIO()
4444
bio.seek(0)
45-
writer = ExcelWriter(bio, engine=engine)
46-
self.df.to_excel(writer, sheet_name="Sheet1")
47-
writer.save()
45+
with ExcelWriter(bio, engine=engine) as writer:
46+
self.df.to_excel(writer, sheet_name="Sheet1")
4847

4948

5049
class WriteExcelStyled:
@@ -57,13 +56,12 @@ def setup(self, engine):
5756
def time_write_excel_style(self, engine):
5857
bio = BytesIO()
5958
bio.seek(0)
60-
writer = ExcelWriter(bio, engine=engine)
61-
df_style = self.df.style
62-
df_style.applymap(lambda x: "border: red 1px solid;")
63-
df_style.applymap(lambda x: "color: blue")
64-
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
65-
df_style.to_excel(writer, sheet_name="Sheet1")
66-
writer.save()
59+
with ExcelWriter(bio, engine=engine) as writer:
60+
df_style = self.df.style
61+
df_style.applymap(lambda x: "border: red 1px solid;")
62+
df_style.applymap(lambda x: "color: blue")
63+
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
64+
df_style.to_excel(writer, sheet_name="Sheet1")
6765

6866

6967
class ReadExcel:

ci/deps/actions-38-downstream_compat.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,11 @@ dependencies:
3434
- gcsfs
3535
- jinja2
3636
- lxml
37-
- matplotlib
37+
- matplotlib>=3.6.1
3838
- numba
3939
- numexpr
4040
- openpyxl
4141
- odfpy
42-
- pandas-gbq
4342
- psycopg2
4443
- pyarrow<10
4544
- pymysql
@@ -68,5 +67,6 @@ dependencies:
6867
- statsmodels
6968
- coverage
7069
- pandas-datareader
70+
- pandas-gbq
7171
- pyyaml
7272
- py

ci/deps/circle-38-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ dependencies:
3333
- gcsfs
3434
- jinja2
3535
- lxml
36-
- matplotlib
36+
- matplotlib>=3.6.1
3737
- numba
3838
- numexpr
3939
- openpyxl

doc/source/development/debugging_extensions.rst

+10
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,13 @@ Note that code execution under valgrind will take much longer than usual. While
119119
.. note::
120120

121121
For best results, you should run use a Python installation configured with Valgrind support (--with-valgrind)
122+
123+
124+
Easier code navigation
125+
======================
126+
127+
Generating a ``compile_commands.json`` file may make it easier to navigate the C extensions, as this allows your code editor to list references, jump to definitions, etc... To make this work with setuptools you can use `Bear <https://github.com/rizsotto/Bear>`_.
128+
129+
.. code-block:: sh
130+
131+
bear -- python setup.py build_ext --inplace -j4 --with-debugging-symbols

doc/source/reference/extensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ objects.
3232
.. autosummary::
3333
:toctree: api/
3434
35+
api.extensions.ExtensionArray._accumulate
3536
api.extensions.ExtensionArray._concat_same_type
3637
api.extensions.ExtensionArray._formatter
3738
api.extensions.ExtensionArray._from_factorized

doc/source/user_guide/basics.rst

+47-19
Original file line numberDiff line numberDiff line change
@@ -827,20 +827,54 @@ In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``.
827827

828828
For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.ols, 'data')`` to ``pipe``:
829829

830-
.. ipython:: python
831-
:okwarning:
832-
833-
import statsmodels.formula.api as sm
834-
835-
bb = pd.read_csv("data/baseball.csv", index_col="id")
830+
.. code-block:: ipython
836831
837-
(
838-
bb.query("h > 0")
839-
.assign(ln_h=lambda df: np.log(df.h))
840-
.pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
841-
.fit()
842-
.summary()
843-
)
832+
In [147]: import statsmodels.formula.api as sm
833+
834+
In [148]: bb = pd.read_csv("data/baseball.csv", index_col="id")
835+
836+
In [149]: (
837+
.....: bb.query("h > 0")
838+
.....: .assign(ln_h=lambda df: np.log(df.h))
839+
.....: .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
840+
.....: .fit()
841+
.....: .summary()
842+
.....: )
843+
.....:
844+
Out[149]:
845+
<class 'statsmodels.iolib.summary.Summary'>
846+
"""
847+
OLS Regression Results
848+
==============================================================================
849+
Dep. Variable: hr R-squared: 0.685
850+
Model: OLS Adj. R-squared: 0.665
851+
Method: Least Squares F-statistic: 34.28
852+
Date: Tue, 22 Nov 2022 Prob (F-statistic): 3.48e-15
853+
Time: 05:34:17 Log-Likelihood: -205.92
854+
No. Observations: 68 AIC: 421.8
855+
Df Residuals: 63 BIC: 432.9
856+
Df Model: 4
857+
Covariance Type: nonrobust
858+
===============================================================================
859+
coef std err t P>|t| [0.025 0.975]
860+
-------------------------------------------------------------------------------
861+
Intercept -8484.7720 4664.146 -1.819 0.074 -1.78e+04 835.780
862+
C(lg)[T.NL] -2.2736 1.325 -1.716 0.091 -4.922 0.375
863+
ln_h -1.3542 0.875 -1.547 0.127 -3.103 0.395
864+
year 4.2277 2.324 1.819 0.074 -0.417 8.872
865+
g 0.1841 0.029 6.258 0.000 0.125 0.243
866+
==============================================================================
867+
Omnibus: 10.875 Durbin-Watson: 1.999
868+
Prob(Omnibus): 0.004 Jarque-Bera (JB): 17.298
869+
Skew: 0.537 Prob(JB): 0.000175
870+
Kurtosis: 5.225 Cond. No. 1.49e+07
871+
==============================================================================
872+
873+
Notes:
874+
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
875+
[2] The condition number is large, 1.49e+07. This might indicate that there are
876+
strong multicollinearity or other numerical problems.
877+
"""
844878
845879
The pipe method is inspired by unix pipes and more recently dplyr_ and magrittr_, which
846880
have introduced the popular ``(%>%)`` (read pipe) operator for R_.
@@ -1213,12 +1247,6 @@ With a DataFrame, you can simultaneously reindex the index and columns:
12131247
df
12141248
df.reindex(index=["c", "f", "b"], columns=["three", "two", "one"])
12151249
1216-
You may also use ``reindex`` with an ``axis`` keyword:
1217-
1218-
.. ipython:: python
1219-
1220-
df.reindex(["c", "f", "b"], axis="index")
1221-
12221250
Note that the ``Index`` objects containing the actual axis labels can be
12231251
**shared** between objects. So if we have a Series and a DataFrame, the
12241252
following can be done:

doc/source/whatsnew/v0.16.2.rst

+49-15
Original file line numberDiff line numberDiff line change
@@ -61,21 +61,55 @@ In the example above, the functions ``f``, ``g``, and ``h`` each expected the Da
6161
When the function you wish to apply takes its data anywhere other than the first argument, pass a tuple
6262
of ``(function, keyword)`` indicating where the DataFrame should flow. For example:
6363

64-
.. ipython:: python
65-
:okwarning:
66-
67-
import statsmodels.formula.api as sm
68-
69-
bb = pd.read_csv("data/baseball.csv", index_col="id")
70-
71-
# sm.ols takes (formula, data)
72-
(
73-
bb.query("h > 0")
74-
.assign(ln_h=lambda df: np.log(df.h))
75-
.pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
76-
.fit()
77-
.summary()
78-
)
64+
.. code-block:: ipython
65+
66+
In [1]: import statsmodels.formula.api as sm
67+
68+
In [2]: bb = pd.read_csv("data/baseball.csv", index_col="id")
69+
70+
# sm.ols takes (formula, data)
71+
In [3]: (
72+
...: bb.query("h > 0")
73+
...: .assign(ln_h=lambda df: np.log(df.h))
74+
...: .pipe((sm.ols, "data"), "hr ~ ln_h + year + g + C(lg)")
75+
...: .fit()
76+
...: .summary()
77+
...: )
78+
...:
79+
Out[3]:
80+
<class 'statsmodels.iolib.summary.Summary'>
81+
"""
82+
OLS Regression Results
83+
==============================================================================
84+
Dep. Variable: hr R-squared: 0.685
85+
Model: OLS Adj. R-squared: 0.665
86+
Method: Least Squares F-statistic: 34.28
87+
Date: Tue, 22 Nov 2022 Prob (F-statistic): 3.48e-15
88+
Time: 05:35:23 Log-Likelihood: -205.92
89+
No. Observations: 68 AIC: 421.8
90+
Df Residuals: 63 BIC: 432.9
91+
Df Model: 4
92+
Covariance Type: nonrobust
93+
===============================================================================
94+
coef std err t P>|t| [0.025 0.975]
95+
-------------------------------------------------------------------------------
96+
Intercept -8484.7720 4664.146 -1.819 0.074 -1.78e+04 835.780
97+
C(lg)[T.NL] -2.2736 1.325 -1.716 0.091 -4.922 0.375
98+
ln_h -1.3542 0.875 -1.547 0.127 -3.103 0.395
99+
year 4.2277 2.324 1.819 0.074 -0.417 8.872
100+
g 0.1841 0.029 6.258 0.000 0.125 0.243
101+
==============================================================================
102+
Omnibus: 10.875 Durbin-Watson: 1.999
103+
Prob(Omnibus): 0.004 Jarque-Bera (JB): 17.298
104+
Skew: 0.537 Prob(JB): 0.000175
105+
Kurtosis: 5.225 Cond. No. 1.49e+07
106+
==============================================================================
107+
108+
Notes:
109+
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
110+
[2] The condition number is large, 1.49e+07. This might indicate that there are
111+
strong multicollinearity or other numerical problems.
112+
"""
79113
80114
The pipe method is inspired by unix pipes, which stream text through
81115
processes. More recently dplyr_ and magrittr_ have introduced the

doc/source/whatsnew/v1.5.3.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ Fixed regressions
2727
Bug fixes
2828
~~~~~~~~~
2929
- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`)
30+
- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`)
31+
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
3032
-
3133

3234
.. ---------------------------------------------------------------------------
@@ -35,7 +37,6 @@ Bug fixes
3537
Other
3638
~~~~~
3739
-
38-
-
3940

4041
.. ---------------------------------------------------------------------------
4142
.. _whatsnew_153.contributors:

0 commit comments

Comments
 (0)