Skip to content

Commit 556a8e4

Browse files
authored
Merge branch 'main' into 49277
2 parents a20177b + cb57af0 commit 556a8e4

File tree

246 files changed

+1293
-4246
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

246 files changed

+1293
-4246
lines changed

.github/workflows/scorecards.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
persist-credentials: false
3030

3131
- name: "Run analysis"
32-
uses: ossf/[email protected].3
32+
uses: ossf/[email protected].6
3333
with:
3434
results_file: results.sarif
3535
results_format: sarif

.github/workflows/wheels.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
- [windows-2019, win_amd64]
5353
- [windows-2019, win32]
5454
# TODO: support PyPy?
55-
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11-dev"]]# "pp38", "pp39"]
55+
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]# "pp38", "pp39"]
5656
env:
5757
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
5858
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -73,7 +73,7 @@ jobs:
7373
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
7474

7575
# Used to test the built wheels
76-
- uses: actions/setup-python@v3
76+
- uses: actions/setup-python@v4
7777
with:
7878
python-version: ${{ matrix.python[1] }}
7979

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ repos:
102102
types: [python]
103103
stages: [manual]
104104
additional_dependencies: &pyright_dependencies
105-
105+
106106
- id: pyright_reportGeneralTypeIssues
107107
# note: assumes python env is setup and activated
108108
name: pyright reportGeneralTypeIssues

asv_bench/asv.conf.json

-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"openpyxl": [],
5555
"xlsxwriter": [],
5656
"xlrd": [],
57-
"xlwt": [],
5857
"odfpy": [],
5958
"jinja2": [],
6059
},

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,7 @@ class String:
683683
def setup(self, dtype, method):
684684
cols = list("abcdefghjkl")
685685
self.df = DataFrame(
686-
np.random.randint(0, 100, size=(1_000_000, len(cols))),
686+
np.random.randint(0, 100, size=(10_000, len(cols))),
687687
columns=cols,
688688
dtype=dtype,
689689
)

asv_bench/benchmarks/io/excel.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _generate_dataframe():
3333

3434
class WriteExcel:
3535

36-
params = ["openpyxl", "xlsxwriter", "xlwt"]
36+
params = ["openpyxl", "xlsxwriter"]
3737
param_names = ["engine"]
3838

3939
def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
6868

6969
class ReadExcel:
7070

71-
params = ["xlrd", "openpyxl", "odf"]
71+
params = ["openpyxl", "odf"]
7272
param_names = ["engine"]
7373
fname_excel = "spreadsheet.xlsx"
74-
fname_excel_xls = "spreadsheet.xls"
7574
fname_odf = "spreadsheet.ods"
7675

7776
def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
9291
self.df = _generate_dataframe()
9392

9493
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
95-
self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
9694
self._create_odf()
9795

9896
def time_read_excel(self, engine):
99-
if engine == "xlrd":
100-
fname = self.fname_excel_xls
101-
elif engine == "odf":
97+
if engine == "odf":
10298
fname = self.fname_odf
10399
else:
104100
fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
107103

108104
class ReadExcelNRows(ReadExcel):
109105
def time_read_excel(self, engine):
110-
if engine == "xlrd":
111-
fname = self.fname_excel_xls
112-
elif engine == "odf":
106+
if engine == "odf":
113107
fname = self.fname_odf
114108
else:
115109
fname = self.fname_excel

asv_bench/benchmarks/io/sql.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def setup(self, connection):
3838
},
3939
index=tm.makeStringIndex(N),
4040
)
41-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
41+
self.df.iloc[1000:3000, 1] = np.nan
4242
self.df["date"] = self.df["datetime"].dt.date
4343
self.df["time"] = self.df["datetime"].dt.time
4444
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -88,7 +88,7 @@ def setup(self, connection, dtype):
8888
},
8989
index=tm.makeStringIndex(N),
9090
)
91-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
91+
self.df.iloc[1000:3000, 1] = np.nan
9292
self.df["date"] = self.df["datetime"].dt.date
9393
self.df["time"] = self.df["datetime"].dt.time
9494
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -117,7 +117,7 @@ def setup(self):
117117
},
118118
index=tm.makeStringIndex(N),
119119
)
120-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
120+
self.df.iloc[1000:3000, 1] = np.nan
121121
self.df["date"] = self.df["datetime"].dt.date
122122
self.df["time"] = self.df["datetime"].dt.time
123123
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -164,7 +164,7 @@ def setup(self, dtype):
164164
},
165165
index=tm.makeStringIndex(N),
166166
)
167-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
167+
self.df.iloc[1000:3000, 1] = np.nan
168168
self.df["date"] = self.df["datetime"].dt.date
169169
self.df["time"] = self.df["datetime"].dt.time
170170
self.df["datetime_string"] = self.df["datetime"].astype(str)

asv_bench/benchmarks/io/style.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ def _style_format(self):
8383
def _style_apply_format_hide(self):
8484
self.st = self.df.style.applymap(lambda v: "color: red;")
8585
self.st.format("{:.3f}")
86-
self.st.hide_index(self.st.index[1:])
87-
self.st.hide_columns(self.st.columns[1:])
86+
self.st.hide(self.st.index[1:], axis=0)
87+
self.st.hide(self.st.columns[1:], axis=1)
8888

8989
def _style_tooltips(self):
9090
ttips = DataFrame("abc", index=self.df.index[::2], columns=self.df.columns[::2])
9191
self.st = self.df.style.set_tooltips(ttips)
92-
self.st.hide_index(self.st.index[12:])
93-
self.st.hide_columns(self.st.columns[12:])
92+
self.st.hide(self.st.index[12:], axis=0)
93+
self.st.hide(self.st.columns[12:], axis=1)

asv_bench/benchmarks/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def setup(self):
3636
self.df = DataFrame(data)
3737

3838
def time_reshape_pivot_time_series(self):
39-
self.df.pivot("date", "variable", "value")
39+
self.df.pivot(index="date", columns="variable", values="value")
4040

4141

4242
class SimpleReshape:

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import pandas
4747
4848
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
4949
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
50-
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
50+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter'}
5151
5252
# GH#28227 for some of these check for top-level modules, while others are
5353
# more specific (e.g. urllib.request)

ci/deps/actions-310.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/actions-38-downstream_compat.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard
5655

5756
# downstream packages

ci/deps/actions-38-minimum_versions.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,4 @@ dependencies:
5353
- xarray=0.19.0
5454
- xlrd=2.0.1
5555
- xlsxwriter=1.4.3
56-
- xlwt=1.3.0
5756
- zstandard=0.15.2

ci/deps/actions-38.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,4 @@ dependencies:
5050
- xarray
5151
- xlrd
5252
- xlsxwriter
53-
- xlwt
5453
- zstandard

ci/deps/actions-39.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/circle-38-arm64.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

doc/scripts/eval_performance.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from timeit import repeat as timeit
2+
3+
import numpy as np
4+
import seaborn as sns
5+
6+
from pandas import DataFrame
7+
8+
setup_common = """from pandas import DataFrame
9+
from numpy.random import randn
10+
df = DataFrame(randn(%d, 3), columns=list('abc'))
11+
%s"""
12+
13+
setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
14+
15+
16+
def bench_with(n, times=10, repeat=3, engine="numexpr"):
17+
return (
18+
np.array(
19+
timeit(
20+
"df.eval(s, engine=%r)" % engine,
21+
setup=setup_common % (n, setup_with),
22+
repeat=repeat,
23+
number=times,
24+
)
25+
)
26+
/ times
27+
)
28+
29+
30+
setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'"
31+
32+
33+
def bench_subset(n, times=20, repeat=3, engine="numexpr"):
34+
return (
35+
np.array(
36+
timeit(
37+
"df.query(s, engine=%r)" % engine,
38+
setup=setup_common % (n, setup_subset),
39+
repeat=repeat,
40+
number=times,
41+
)
42+
)
43+
/ times
44+
)
45+
46+
47+
def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False):
48+
r = np.logspace(mn, mx, num=num).round().astype(int)
49+
50+
ev = DataFrame(np.empty((num, len(engines))), columns=engines)
51+
qu = ev.copy(deep=True)
52+
53+
ev["size"] = qu["size"] = r
54+
55+
for engine in engines:
56+
for i, n in enumerate(r):
57+
if verbose & (i % 10 == 0):
58+
print("engine: %r, i == %d" % (engine, i))
59+
ev_times = bench_with(n, times=1, repeat=1, engine=engine)
60+
ev.loc[i, engine] = np.mean(ev_times)
61+
qu_times = bench_subset(n, times=1, repeat=1, engine=engine)
62+
qu.loc[i, engine] = np.mean(qu_times)
63+
64+
return ev, qu
65+
66+
67+
def plot_perf(df, engines, title, filename=None):
68+
from matplotlib.pyplot import figure
69+
70+
sns.set()
71+
sns.set_palette("Set2")
72+
73+
fig = figure(figsize=(4, 3), dpi=120)
74+
ax = fig.add_subplot(111)
75+
76+
for engine in engines:
77+
ax.loglog(df["size"], df[engine], label=engine, lw=2)
78+
79+
ax.set_xlabel("Number of Rows")
80+
ax.set_ylabel("Time (s)")
81+
ax.set_title(title)
82+
ax.legend(loc="best")
83+
ax.tick_params(top=False, right=False)
84+
85+
fig.tight_layout()
86+
87+
if filename is not None:
88+
fig.savefig(filename)
89+
90+
91+
if __name__ == "__main__":
92+
import os
93+
94+
pandas_dir = os.path.dirname(
95+
os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
96+
)
97+
static_path = os.path.join(pandas_dir, "doc", "source", "_static")
98+
99+
join = lambda p: os.path.join(static_path, p)
100+
101+
fn = join("eval-query-perf-data.h5")
102+
103+
engines = "python", "numexpr"
104+
105+
ev, qu = bench(verbose=True) # only this one
106+
107+
plot_perf(ev, engines, "DataFrame.eval()", filename=join("eval-perf.png"))
108+
plot_perf(qu, engines, "DataFrame.query()", filename=join("query-perf.png"))
-24.7 KB
Binary file not shown.

doc/source/_static/eval-perf.png

10.8 KB
Loading
-21.2 KB
Binary file not shown.

doc/source/_static/query-perf.png

8.79 KB
Loading

doc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@
236236
if ".dev" in version:
237237
switcher_version = "dev"
238238
elif "rc" in version:
239-
switcher_version = version.split("rc")[0] + " (rc)"
239+
switcher_version = version.split("rc", maxsplit=1)[0] + " (rc)"
240240

241241
html_theme_options = {
242242
"external_links": [],

doc/source/development/contributing_environment.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ To test out code changes, you'll need to build pandas from source, which
1010
requires a C/C++ compiler and Python environment. If you're making documentation
1111
changes, you can skip to :ref:`contributing to the documentation <contributing_documentation>` but if you skip
1212
creating the development environment you won't be able to build the documentation
13-
locally before pushing your changes.
13+
locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks <contributing.pre-commit>`.
1414

1515
.. contents:: Table of contents:
1616
:local:

doc/source/getting_started/install.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Instructions for installing from source,
2020
Python version support
2121
----------------------
2222

23-
Officially Python 3.8, 3.9 and 3.10.
23+
Officially Python 3.8, 3.9, 3.10 and 3.11.
2424

2525
Installing pandas
2626
-----------------
@@ -336,7 +336,6 @@ Can be managed as optional_extra with ``pandas[excel]``.
336336
Dependency Minimum Version optional_extra Notes
337337
========================= ================== =============== =============================================================
338338
xlrd 2.0.1 excel Reading Excel
339-
xlwt 1.3.0 excel Writing Excel
340339
xlsxwriter 1.4.3 excel Writing Excel
341340
openpyxl 3.0.7 excel Reading / writing for xlsx files
342341
pyxlsb 1.0.8 excel Reading for xlsb files

doc/source/getting_started/intro_tutorials/09_timeseries.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ I want to add a new column to the ``DataFrame`` containing only the month of the
144144
145145
By using ``Timestamp`` objects for dates, a lot of time-related
146146
properties are provided by pandas. For example the ``month``, but also
147-
``year``, ``weekofyear``, ``quarter``,… All of these properties are
147+
``year``, ``quarter``,… All of these properties are
148148
accessible by the ``dt`` accessor.
149149

150150
.. raw:: html

doc/source/reference/indexing.rst

-2
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,6 @@ Time/date components
343343
DatetimeIndex.timetz
344344
DatetimeIndex.dayofyear
345345
DatetimeIndex.day_of_year
346-
DatetimeIndex.weekofyear
347-
DatetimeIndex.week
348346
DatetimeIndex.dayofweek
349347
DatetimeIndex.day_of_week
350348
DatetimeIndex.weekday

doc/source/reference/series.rst

-2
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,6 @@ Datetime properties
311311
Series.dt.second
312312
Series.dt.microsecond
313313
Series.dt.nanosecond
314-
Series.dt.week
315-
Series.dt.weekofyear
316314
Series.dt.dayofweek
317315
Series.dt.day_of_week
318316
Series.dt.weekday

0 commit comments

Comments
 (0)