Skip to content

Commit c772789

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents cfec8a7 + d558bce commit c772789

File tree

245 files changed

+6336
-4838
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

245 files changed

+6336
-4838
lines changed

.github/workflows/comment_bot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- name: Install-pre-commit
3030
run: python -m pip install --upgrade pre-commit
3131
- name: Run pre-commit
32-
run: pre-commit run --all-files || (exit 0)
32+
run: pre-commit run --from-ref=origin/master --to-ref=HEAD --all-files || (exit 0)
3333
- name: Commit results
3434
run: |
3535
git config user.name "$(git log -1 --pretty=format:%an)"

.pre-commit-config.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ repos:
2424
hooks:
2525
- id: isort
2626
- repo: https://github.com/asottile/pyupgrade
27-
rev: v2.7.4
27+
rev: v2.9.0
2828
hooks:
2929
- id: pyupgrade
30-
args: [--py37-plus]
30+
args: [--py37-plus, --keep-runtime-typing]
3131
- repo: https://github.com/pre-commit/pygrep-hooks
3232
rev: v1.7.0
3333
hooks:
@@ -192,6 +192,11 @@ repos:
192192
files: ^pandas/
193193
exclude: ^pandas/tests/
194194
- repo: https://github.com/MarcoGorelli/no-string-hints
195-
rev: v0.1.6
195+
rev: v0.1.7
196196
hooks:
197197
- id: no-string-hints
198+
- repo: https://github.com/MarcoGorelli/abs-imports
199+
rev: v0.1.2
200+
hooks:
201+
- id: abs-imports
202+
files: ^pandas/

asv_bench/benchmarks/io/csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class ToCSVIndexes(BaseIO):
8484
def _create_df(rows, cols):
8585
index_cols = {
8686
"index1": np.random.randint(0, rows, rows),
87-
"index2": np.full(rows, 1, dtype=np.int),
88-
"index3": np.full(rows, 1, dtype=np.int),
87+
"index2": np.full(rows, 1, dtype=int),
88+
"index3": np.full(rows, 1, dtype=int),
8989
}
9090
data_cols = {
9191
f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)

asv_bench/benchmarks/rolling.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,11 @@ class Pairwise:
140140

141141
def setup(self, window, method, pairwise):
142142
N = 10 ** 4
143+
n_groups = 20
144+
groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
143145
arr = np.random.random(N)
144146
self.df = pd.DataFrame(arr)
147+
self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
145148

146149
def time_pairwise(self, window, method, pairwise):
147150
if window is None:
@@ -150,6 +153,13 @@ def time_pairwise(self, window, method, pairwise):
150153
r = self.df.rolling(window=window)
151154
getattr(r, method)(self.df, pairwise=pairwise)
152155

156+
def time_groupby(self, window, method, pairwise):
157+
if window is None:
158+
r = self.df_group.expanding()
159+
else:
160+
r = self.df_group.rolling(window=window)
161+
getattr(r, method)(self.df, pairwise=pairwise)
162+
153163

154164
class Quantile:
155165
params = (

asv_bench/benchmarks/series_methods.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ def setup(self):
108108
self.vals_short = np.arange(2).astype(object)
109109
self.vals_long = np.arange(10 ** 5).astype(object)
110110
# because of nans floats are special:
111-
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(object)
112-
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(object)
111+
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float_)).astype(object)
112+
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float_).astype(object)
113113

114114
def time_isin_nans(self):
115115
# if nan-objects are different objects,
@@ -284,16 +284,29 @@ def time_clip(self, n):
284284

285285
class ValueCounts:
286286

287-
params = ["int", "uint", "float", "object"]
288-
param_names = ["dtype"]
287+
params = [[10 ** 3, 10 ** 4, 10 ** 5], ["int", "uint", "float", "object"]]
288+
param_names = ["N", "dtype"]
289289

290-
def setup(self, dtype):
291-
self.s = Series(np.random.randint(0, 1000, size=100000)).astype(dtype)
290+
def setup(self, N, dtype):
291+
self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
292292

293-
def time_value_counts(self, dtype):
293+
def time_value_counts(self, N, dtype):
294294
self.s.value_counts()
295295

296296

297+
class Mode:
298+
299+
params = [[10 ** 3, 10 ** 4, 10 ** 5], ["int", "uint", "float", "object"]]
300+
param_names = ["N", "dtype"]
301+
302+
def setup(self, N, dtype):
303+
np.random.seed(42)
304+
self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
305+
306+
def time_mode(self, N, dtype):
307+
self.s.mode()
308+
309+
297310
class Dir:
298311
def setup(self):
299312
self.s = Series(index=tm.makeStringIndex(10000))

ci/code_checks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
6464
# this particular codebase (e.g. src/headers, src/klib). However,
6565
# we can lint all header files since they aren't "generated" like C files are.
6666
MSG='Linting .c and .h' ; echo $MSG
67-
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
67+
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/src/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
6868
RET=$(($RET + $?)) ; echo $MSG "DONE"
6969

7070
fi

ci/deps/actions-37-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dependencies:
1111
- hypothesis>=3.58.0
1212

1313
# required
14-
- numpy
14+
- numpy<1.20 # GH#39541 compat for pyarrow<3
1515
- python-dateutil
1616
- pytz
1717

ci/deps/azure-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- numpy
1919
- python-dateutil
2020
- nomkl
21-
- pyarrow
21+
- pyarrow=0.15.1
2222
- pytz
2323
- s3fs>=0.4.0
2424
- moto>=1.3.14

ci/deps/azure-38-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- moto
2525
- nomkl
2626
- numexpr
27-
- numpy
27+
- numpy<1.20 # GH#39541 compat with pyarrow<3
2828
- openpyxl
2929
- pytables
3030
- python-dateutil

ci/deps/azure-macos-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies:
2121
- numexpr
2222
- numpy=1.16.5
2323
- openpyxl
24-
- pyarrow>=0.15.0
24+
- pyarrow=0.15.1
2525
- pytables
2626
- python-dateutil==2.7.3
2727
- pytz

doc/source/user_guide/enhancingperf.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ in Python, so maybe we could minimize these by cythonizing the apply part.
199199
...: return s * dx
200200
...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b,
201201
...: np.ndarray col_N):
202-
...: assert (col_a.dtype == np.float
203-
...: and col_b.dtype == np.float and col_N.dtype == np.int)
202+
...: assert (col_a.dtype == np.float_
203+
...: and col_b.dtype == np.float_ and col_N.dtype == np.int_)
204204
...: cdef Py_ssize_t i, n = len(col_N)
205205
...: assert (len(col_a) == len(col_b) == n)
206206
...: cdef np.ndarray[double] res = np.empty(n)

doc/source/user_guide/style.ipynb

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,7 @@
180180
"\n",
181181
"styles = [\n",
182182
" hover(),\n",
183-
" {'selector': \"th\", 'props': [(\"font-size\", \"150%\"),\n",
184-
" (\"text-align\", \"center\")]}\n",
183+
" {'selector': \"th\", 'props': [(\"font-size\", \"150%\"), (\"text-align\", \"center\")]}\n",
185184
"]\n",
186185
"\n",
187186
"df.style.set_table_styles(styles)"
@@ -224,7 +223,7 @@
224223
"cell_type": "markdown",
225224
"metadata": {},
226225
"source": [
227-
"We can also chain all of the above by setting the `overwrite` argument to `False` so that it preserves previous settings."
226+
"We can also chain all of the above by setting the `overwrite` argument to `False` so that it preserves previous settings. We also show the CSS string input rather than the list of tuples."
228227
]
229228
},
230229
{
@@ -238,13 +237,13 @@
238237
" set_table_styles(styles).\\\n",
239238
" set_table_styles({\n",
240239
" 'A': [{'selector': '',\n",
241-
" 'props': [('color', 'red')]}],\n",
240+
" 'props': 'color:red;'}],\n",
242241
" 'B': [{'selector': 'td',\n",
243-
" 'props': [('color', 'blue')]}]\n",
242+
" 'props': 'color:blue;'}]\n",
244243
" }, axis=0, overwrite=False).\\\n",
245244
" set_table_styles({\n",
246245
" 3: [{'selector': 'td',\n",
247-
" 'props': [('color', 'green')]}]\n",
246+
" 'props': 'color:green;font-weight:bold;'}]\n",
248247
" }, axis=1, overwrite=False)\n",
249248
"s"
250249
]

doc/source/user_guide/visualization.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,9 @@ These can be specified by the ``x`` and ``y`` keywords.
552552
.. ipython:: python
553553
554554
df = pd.DataFrame(np.random.rand(50, 4), columns=["a", "b", "c", "d"])
555+
df["species"] = pd.Categorical(
556+
["setosa"] * 20 + ["versicolor"] * 20 + ["virginica"] * 10
557+
)
555558
556559
@savefig scatter_plot.png
557560
df.plot.scatter(x="a", y="b");
@@ -579,6 +582,21 @@ each point:
579582
df.plot.scatter(x="a", y="b", c="c", s=50);
580583
581584
585+
.. ipython:: python
586+
:suppress:
587+
588+
plt.close("all")
589+
590+
If a categorical column is passed to ``c``, then a discrete colorbar will be produced:
591+
592+
.. versionadded:: 1.3.0
593+
594+
.. ipython:: python
595+
596+
@savefig scatter_plot_categorical.png
597+
df.plot.scatter(x="a", y="b", c="species", cmap="viridis", s=50);
598+
599+
582600
.. ipython:: python
583601
:suppress:
584602

doc/source/whatsnew/v0.8.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ New plotting methods
176176
Vytautas Jancauskas, the 2012 GSOC participant, has added many new plot
177177
types. For example, ``'kde'`` is a new option:
178178

179-
.. code-block:: python
179+
.. ipython:: python
180180
181181
s = pd.Series(
182182
np.concatenate((np.random.randn(1000), np.random.randn(1000) * 0.5 + 3))

doc/source/whatsnew/v1.2.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ Deprecations
518518
- Deprecated parameter ``dtype`` of method :meth:`~Index.copy` for all :class:`Index` subclasses. Use the :meth:`~Index.astype` method instead for changing dtype (:issue:`35853`)
519519
- Deprecated parameters ``levels`` and ``codes`` in :meth:`MultiIndex.copy`. Use the :meth:`~MultiIndex.set_levels` and :meth:`~MultiIndex.set_codes` methods instead (:issue:`36685`)
520520
- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
521-
- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`)
521+
- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`35224`)
522522
- The method :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`)
523523
- Deprecated indexing :class:`DataFrame` rows with a single datetime-like string as ``df[string]`` (given the ambiguity whether it is indexing the rows or selecting a column), use ``df.loc[string]`` instead (:issue:`36179`)
524524
- Deprecated :meth:`Index.is_all_dates` (:issue:`27744`)

doc/source/whatsnew/v1.2.2.rst

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,14 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17-
-
17+
18+
- Fixed regression in :func:`read_excel` that caused it to raise ``AttributeError`` when checking version of older xlrd versions (:issue:`38955`)
19+
- Fixed regression in :class:`DataFrame` constructor reordering element when construction from datetime ndarray with dtype not ``"datetime64[ns]"`` (:issue:`39422`)
20+
- Fixed regression in :class:`DataFrame.astype` and :class:`Series.astype` not casting to bytes dtype (:issue:`39474`)
21+
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
22+
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
23+
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
24+
- Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
1825
-
1926

2027
.. ---------------------------------------------------------------------------
@@ -24,7 +31,8 @@ Fixed regressions
2431
Bug fixes
2532
~~~~~~~~~
2633

27-
-
34+
- :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`)
35+
- Fixed bug in :func:`pandas.read_excel` producing incorrect results when the engine ``openpyxl`` is used and the excel file is missing or has incorrect dimension information; the fix requires ``openpyxl`` >= 3.0.0, prior versions may still fail (:issue:`38956`, :issue:`39001`)
2836
-
2937

3038
.. ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)