Skip to content

Commit ff0bd57

Browse files
Merge remote-tracking branch 'upstream/main' into dst-bug
2 parents 21ed3c4 + 68d6b47 commit ff0bd57

File tree

176 files changed

+2003
-1213
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

176 files changed

+2003
-1213
lines changed

.github/workflows/code-checks.yml

+3
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ jobs:
153153
- name: Build image
154154
run: docker build --pull --no-cache --tag pandas-dev-env .
155155

156+
- name: Show environment
157+
run: docker run -w /home/pandas pandas-dev-env mamba run -n pandas-dev python -c "import pandas as pd; print(pd.show_versions())"
158+
156159
requirements-dev-text-installable:
157160
name: Test install requirements-dev.txt
158161
runs-on: ubuntu-latest

.github/workflows/macos-windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
defaults:
2929
run:
3030
shell: bash -el {0}
31-
timeout-minutes: 120
31+
timeout-minutes: 180
3232
strategy:
3333
matrix:
3434
os: [macos-latest, windows-latest]

.github/workflows/python-dev.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,10 @@ jobs:
8080
python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
8181
python -m pip list
8282
83+
# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
8384
- name: Build Pandas
8485
run: |
85-
python setup.py build_ext -q -j2
86+
python setup.py build_ext -q -j1
8687
python -m pip install -e . --no-build-isolation --no-use-pep517
8788
8889
- name: Build Version

.github/workflows/ubuntu.yml

+17-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
defaults:
2727
run:
2828
shell: bash -el {0}
29-
timeout-minutes: 120
29+
timeout-minutes: 180
3030
strategy:
3131
matrix:
3232
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
@@ -42,18 +42,26 @@ jobs:
4242
- name: "Minimum Versions"
4343
env_file: actions-38-minimum_versions.yaml
4444
pattern: "not slow and not network and not single_cpu"
45-
- name: "Locale: it_IT.utf8"
45+
- name: "Locale: it_IT"
4646
env_file: actions-38.yaml
4747
pattern: "not slow and not network and not single_cpu"
4848
extra_apt: "language-pack-it"
49+
# Use the utf8 version as the default, it has no bad side-effect.
4950
lang: "it_IT.utf8"
5051
lc_all: "it_IT.utf8"
51-
- name: "Locale: zh_CN.utf8"
52+
# Also install it_IT (its encoding is ISO8859-1) but do not activate it.
53+
# It will be temporarily activated during tests with locale.setlocale
54+
extra_loc: "it_IT"
55+
- name: "Locale: zh_CN"
5256
env_file: actions-38.yaml
5357
pattern: "not slow and not network and not single_cpu"
5458
extra_apt: "language-pack-zh-hans"
59+
# Use the utf8 version as the default, it has no bad side-effect.
5560
lang: "zh_CN.utf8"
5661
lc_all: "zh_CN.utf8"
62+
# Also install zh_CN (its encoding is gb2312) but do not activate it.
63+
# It will be temporarily activated during tests with locale.setlocale
64+
extra_loc: "zh_CN"
5765
- name: "Copy-on-Write"
5866
env_file: actions-310.yaml
5967
pattern: "not slow and not network and not single_cpu"
@@ -148,6 +156,12 @@ jobs:
148156
# xsel for clipboard tests
149157
run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
150158

159+
- name: Generate extra locales
160+
# These extra locales will be available for locale.setlocale() calls in tests
161+
run: |
162+
sudo locale-gen ${{ matrix.extra_loc }}
163+
if: ${{ matrix.extra_loc }}
164+
151165
- name: Set up Conda
152166
uses: ./.github/actions/setup-conda
153167
with:

.libcst.codemod.yaml

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# String that LibCST should look for in code which indicates that the
2+
# module is generated code.
3+
generated_code_marker: '@generated'
4+
# Command line and arguments for invoking a code formatter. Anything
5+
# specified here must be capable of taking code via stdin and returning
6+
# formatted code via stdout.
7+
formatter: ['black', '-']
8+
# List of regex patterns which LibCST will evaluate against filenames to
9+
# determine if the module should be touched.
10+
blacklist_patterns: []
11+
# List of modules that contain codemods inside of them.
12+
modules:
13+
- 'libcst.codemod.commands'
14+
- 'autotyping'
15+
# Absolute or relative path of the repository root, used for providing
16+
# full-repo metadata. Relative paths should be specified with this file
17+
# location as the base.
18+
repo_root: '.'

.pre-commit-config.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ repos:
2626
hooks:
2727
- id: codespell
2828
types_or: [python, rst, markdown]
29+
- repo: https://github.com/MarcoGorelli/cython-lint
30+
rev: v0.1.4
31+
hooks:
32+
- id: cython-lint
2933
- repo: https://github.com/pre-commit/pre-commit-hooks
3034
rev: v4.3.0
3135
hooks:
@@ -252,3 +256,14 @@ repos:
252256
/(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$
253257
|/tests/
254258
|/_testing/
259+
- id: autotyping
260+
name: autotyping
261+
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics
262+
types_or: [python, pyi]
263+
files: ^pandas
264+
exclude: ^(pandas/tests|pandas/io/clipboard)
265+
language: python
266+
additional_dependencies:
267+
- autotyping==22.9.0
268+
- black==22.6.0
269+
- libcst==0.4.7

Dockerfile

+6-16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM quay.io/condaforge/mambaforge:4.13.0-1
1+
FROM quay.io/condaforge/mambaforge
22

33
# if you forked pandas, you can pass in your own GitHub username to use your fork
44
# i.e. gh_username=myname
@@ -10,16 +10,12 @@ ENV DEBIAN_FRONTEND=noninteractive
1010

1111
# Configure apt and install packages
1212
RUN apt-get update \
13-
&& apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
13+
&& apt-get -y install --no-install-recommends apt-utils git tzdata dialog 2>&1 \
1414
#
15-
# Install tzdata and configure timezone (fix for tests which try to read from "/etc/localtime")
16-
&& apt-get -y install tzdata \
15+
# Configure timezone (fix for tests which try to read from "/etc/localtime")
1716
&& ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
1817
&& dpkg-reconfigure -f noninteractive tzdata \
1918
#
20-
# Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
21-
&& apt-get -y install git iproute2 procps iproute2 lsb-release \
22-
#
2319
# cleanup
2420
&& apt-get autoremove -y \
2521
&& apt-get clean -y \
@@ -35,18 +31,12 @@ RUN mkdir "$pandas_home" \
3531
&& git remote add upstream "https://github.com/pandas-dev/pandas.git" \
3632
&& git pull upstream main
3733

38-
# Because it is surprisingly difficult to activate a conda environment inside a DockerFile
39-
# (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89),
40-
# we just update the base/root one from the 'environment.yml' file instead of creating a new one.
41-
#
4234
# Set up environment
43-
RUN mamba env update -n base -f "$pandas_home/environment.yml"
35+
RUN mamba env create -f "$pandas_home/environment.yml"
4436

4537
# Build C extensions and pandas
46-
SHELL ["/bin/bash", "-c"]
47-
RUN . /opt/conda/etc/profile.d/conda.sh \
48-
&& conda activate base \
49-
&& cd "$pandas_home" \
38+
SHELL ["mamba", "run", "--no-capture-output", "-n", "pandas-dev", "/bin/bash", "-c"]
39+
RUN cd "$pandas_home" \
5040
&& export \
5141
&& python setup.py build_ext -j 4 \
5242
&& python -m pip install --no-build-isolation -e .

asv_bench/benchmarks/algorithms.py

+23
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,29 @@ def time_duplicated(self, unique, keep, dtype):
9595
self.idx.duplicated(keep=keep)
9696

9797

98+
class DuplicatedMaskedArray:
99+
100+
params = [
101+
[True, False],
102+
["first", "last", False],
103+
["Int64", "Float64"],
104+
]
105+
param_names = ["unique", "keep", "dtype"]
106+
107+
def setup(self, unique, keep, dtype):
108+
N = 10**5
109+
data = pd.Series(np.arange(N), dtype=dtype)
110+
data[list(range(1, N, 100))] = pd.NA
111+
if not unique:
112+
data = data.repeat(5)
113+
self.ser = data
114+
# cache is_unique
115+
self.ser.is_unique
116+
117+
def time_duplicated(self, unique, keep, dtype):
118+
self.ser.duplicated(keep=keep)
119+
120+
98121
class Hashing:
99122
def setup_cache(self):
100123
N = 10**5

asv_bench/benchmarks/array.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ def time_from_float_array(self):
3232

3333
class IntegerArray:
3434
def setup(self):
35-
self.values_integer = np.array([1, 0, 1, 0])
36-
self.data = np.array([1, 2, 3, 4], dtype="int64")
37-
self.mask = np.array([False, False, True, False])
35+
N = 250_000
36+
self.values_integer = np.array([1, 0, 1, 0] * N)
37+
self.data = np.array([1, 2, 3, 4] * N, dtype="int64")
38+
self.mask = np.array([False, False, True, False] * N)
3839

3940
def time_constructor(self):
4041
pd.arrays.IntegerArray(self.data, self.mask)

asv_bench/benchmarks/multiindex_object.py

+15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
MultiIndex,
99
RangeIndex,
1010
Series,
11+
array,
1112
date_range,
1213
)
1314

@@ -176,6 +177,20 @@ def time_sortlevel_one(self):
176177
self.mi.sortlevel(1)
177178

178179

180+
class SortValues:
181+
182+
params = ["int64", "Int64"]
183+
param_names = ["dtype"]
184+
185+
def setup(self, dtype):
186+
a = array(np.tile(np.arange(100), 1000), dtype=dtype)
187+
b = array(np.tile(np.arange(1000), 100), dtype=dtype)
188+
self.mi = MultiIndex.from_arrays([a, b])
189+
190+
def time_sort_values(self, dtype):
191+
self.mi.sort_values()
192+
193+
179194
class Values:
180195
def setup_cache(self):
181196

asv_bench/benchmarks/series_methods.py

+14
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44

55
from pandas import (
6+
NA,
67
Index,
78
NaT,
89
Series,
@@ -166,6 +167,19 @@ def time_value_counts(self, N, dtype):
166167
self.s.value_counts()
167168

168169

170+
class ValueCountsEA:
171+
172+
params = [[10**3, 10**4, 10**5], [True, False]]
173+
param_names = ["N", "dropna"]
174+
175+
def setup(self, N, dropna):
176+
self.s = Series(np.random.randint(0, N, size=10 * N), dtype="Int64")
177+
self.s.loc[1] = NA
178+
179+
def time_value_counts(self, N, dropna):
180+
self.s.value_counts(dropna=dropna)
181+
182+
169183
class ValueCountsObjectDropNAFalse:
170184

171185
params = [10**3, 10**4, 10**5]

doc/source/development/contributing_environment.rst

+10
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,16 @@ Run Container::
237237
# Run a container and bind your local repo to the container
238238
docker run -it -w /home/pandas --rm -v path-to-local-pandas-repo:/home/pandas pandas-yourname-env
239239

240+
Then a ``pandas-dev`` virtual environment will be available with all the development dependencies.
241+
242+
.. code-block:: shell
243+
244+
root@... :/home/pandas# conda env list
245+
# conda environments:
246+
#
247+
base * /opt/conda
248+
pandas-dev /opt/conda/envs/pandas-dev
249+
240250
.. note::
241251
If you bind your local repo for the first time, you have to build the C extensions afterwards.
242252
Run the following command inside the container::

doc/source/user_guide/timeseries.rst

-1
Original file line numberDiff line numberDiff line change
@@ -1981,7 +1981,6 @@ frequency. Arithmetic is not allowed between ``Period`` with different ``freq``
19811981
p = pd.Period("2012-01", freq="2M")
19821982
p + 2
19831983
p - 1
1984-
@okexcept
19851984
p == pd.Period("2012-01", freq="3M")
19861985
19871986

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ Other enhancements
308308
- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`)
309309
- :class:`Series` and :class:`DataFrame` with :class:`IntegerDtype` now supports bitwise operations (:issue:`34463`)
310310
- Add ``milliseconds`` field support for :class:`.DateOffset` (:issue:`43371`)
311+
- :meth:`DataFrame.where` tries to maintain dtype of :class:`DataFrame` if fill value can be cast without loss of precision (:issue:`45582`)
311312
- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
312313
- :func:`concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
313314
- :func:`concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
@@ -331,6 +332,7 @@ Other enhancements
331332
- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`)
332333
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
333334
- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
335+
- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
334336

335337
.. ---------------------------------------------------------------------------
336338
.. _whatsnew_150.notable_bug_fixes:

0 commit comments

Comments
 (0)