Skip to content

Commit 4b960f5

Browse files
authored
Merge branch 'master' into issue-37643
2 parents 0a45fcc + 56b9a80 commit 4b960f5

File tree

368 files changed

+7650
-4873
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

368 files changed

+7650
-4873
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,5 @@ RUN conda env update -n base -f "$pandas_home/environment.yml"
4343

4444
# Build C extensions and pandas
4545
RUN cd "$pandas_home" \
46-
&& python setup.py build_ext --inplace -j 4 \
46+
&& python setup.py build_ext -j 4 \
4747
&& python -m pip install -e .

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ clean_pyc:
99
-find . -name '*.py[co]' -exec rm {} \;
1010

1111
build: clean_pyc
12-
python setup.py build_ext --inplace
12+
python setup.py build_ext
1313

1414
lint-diff:
1515
git diff upstream/master --name-only -- "*.py" | xargs flake8

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Here are just a few of the things that pandas does well:
6060
and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
6161
- [**Time series**][timeseries]-specific functionality: date range
6262
generation and frequency conversion, moving window statistics,
63-
date shifting and lagging.
63+
date shifting and lagging
6464

6565

6666
[missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ def setup(self):
486486
tmp2 = (np.random.random(10000) * 10.0).astype(np.float32)
487487
tmp = np.concatenate((tmp1, tmp2))
488488
arr = np.repeat(tmp, 10)
489-
self.df = DataFrame(dict(a=arr, b=arr))
489+
self.df = DataFrame({"a": arr, "b": arr})
490490

491491
def time_sum(self):
492492
self.df.groupby(["a"])["b"].sum()

asv_bench/benchmarks/indexing.py

+8
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,14 @@ def time_assign_with_setitem(self):
358358
for i in range(100):
359359
self.df[i] = np.random.randn(self.N)
360360

361+
def time_assign_list_like_with_setitem(self):
362+
np.random.seed(1234)
363+
self.df[list(range(100))] = np.random.randn(self.N, 100)
364+
365+
def time_assign_list_of_columns_concat(self):
366+
df = DataFrame(np.random.randn(self.N, 100))
367+
concat([self.df, df], axis=1)
368+
361369

362370
class ChainIndexing:
363371

asv_bench/benchmarks/join_merge.py

+6
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ def time_join_dataframe_index_single_key_small(self, sort):
132132
def time_join_dataframe_index_shuffle_key_bigger_sort(self, sort):
133133
self.df_shuf.join(self.df_key2, on="key2", sort=sort)
134134

135+
def time_join_dataframes_cross(self, sort):
136+
self.df.loc[:2000].join(self.df_key1, how="cross", sort=sort)
137+
135138

136139
class JoinIndex:
137140
def setup(self):
@@ -205,6 +208,9 @@ def time_merge_dataframe_integer_2key(self, sort):
205208
def time_merge_dataframe_integer_key(self, sort):
206209
merge(self.df, self.df2, on="key1", sort=sort)
207210

211+
def time_merge_dataframes_cross(self, sort):
212+
merge(self.left.loc[:2000], self.right.loc[:2000], how="cross", sort=sort)
213+
208214

209215
class I8Merge:
210216

asv_bench/benchmarks/reshape.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ def setup(self):
103103
nidvars = 20
104104
N = 5000
105105
self.letters = list("ABCD")
106-
yrvars = [l + str(num) for l, num in product(self.letters, range(1, nyrs + 1))]
106+
yrvars = [
107+
letter + str(num)
108+
for letter, num in product(self.letters, range(1, nyrs + 1))
109+
]
107110
columns = [str(i) for i in range(nidvars)] + yrvars
108111
self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), columns=columns)
109112
self.df["id"] = self.df.index

asv_bench/benchmarks/series_methods.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import NaT, Series, date_range
5+
from pandas import Categorical, NaT, Series, date_range
66

77
from .pandas_vb_common import tm
88

@@ -36,6 +36,28 @@ def time_isin(self, dtypes):
3636
self.s.isin(self.values)
3737

3838

39+
class IsInDatetime64:
40+
def setup(self):
41+
dti = date_range(
42+
start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
43+
)
44+
self.ser = Series(dti)
45+
self.subset = self.ser._values[::3]
46+
self.cat_subset = Categorical(self.subset)
47+
48+
def time_isin(self):
49+
self.ser.isin(self.subset)
50+
51+
def time_isin_cat_values(self):
52+
self.ser.isin(self.cat_subset)
53+
54+
def time_isin_mismatched_dtype(self):
55+
self.ser.isin([1, 2])
56+
57+
def time_isin_empty(self):
58+
self.ser.isin([])
59+
60+
3961
class IsInFloat64:
4062
def setup(self):
4163
self.small = Series([1, 2], dtype=np.float64)

azure-pipelines.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
. ~/virtualenvs/pandas-dev/bin/activate && \
4141
python -m pip install --no-deps -U pip wheel setuptools && \
4242
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines && \
43-
python setup.py build_ext -q -i -j2 && \
43+
python setup.py build_ext -q -j2 && \
4444
python -m pip install --no-build-isolation -e . && \
4545
pytest -m 'not slow and not network and not clipboard' pandas --junitxml=test-data.xml"
4646
displayName: 'Run 32-bit manylinux2014 Docker Build / Tests'

ci/azure/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- bash: |
3535
source activate pandas-dev
3636
conda list
37-
python setup.py build_ext -q -i -j 4
37+
python setup.py build_ext -q -j 4
3838
python -m pip install --no-build-isolation -e .
3939
displayName: 'Build'
4040

ci/deps/azure-39.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ dependencies:
1515
- numpy
1616
- python-dateutil
1717
- pytz
18+
19+
# optional dependencies
20+
- pytables
21+
- scipy
22+
- pyarrow=1.0

ci/deps/travis-37-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ dependencies:
3434
- pyarrow>=0.17
3535
- pytables>=3.5.1
3636
- scipy
37-
- xarray=0.12.0
37+
- xarray=0.12.3
3838
- xlrd
3939
- xlsxwriter
4040
- xlwt

ci/setup_env.sh

+7-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ fi
108108
echo "activate pandas-dev"
109109
source activate pandas-dev
110110

111+
# Explicitly set an environment variable indicating that this is pandas' CI environment.
112+
#
113+
# This allows us to enable things like -Werror that shouldn't be activated in
114+
# downstream CI jobs that may also build pandas from source.
115+
export PANDAS_CI=1
116+
111117
echo
112118
echo "remove any installed pandas package"
113119
echo "w/o removing anything else"
@@ -131,7 +137,7 @@ conda list pandas
131137
# Make sure any error below is reported as such
132138

133139
echo "[Build extensions]"
134-
python setup.py build_ext -q -i -j2
140+
python setup.py build_ext -q -j2
135141

136142
echo "[Updating pip]"
137143
python -m pip install --no-deps -U pip wheel setuptools

doc/source/development/contributing.rst

+39-12
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ Creating a development environment
146146
----------------------------------
147147

148148
To test out code changes, you'll need to build pandas from source, which
149-
requires a C compiler and Python environment. If you're making documentation
149+
requires a C/C++ compiler and Python environment. If you're making documentation
150150
changes, you can skip to :ref:`contributing.documentation` but you won't be able
151151
to build the documentation locally before pushing your changes.
152152

@@ -183,7 +183,7 @@ See https://www.jetbrains.com/help/pycharm/docker.html for details.
183183

184184
Note that you might need to rebuild the C extensions if/when you merge with upstream/master using::
185185

186-
python setup.py build_ext --inplace -j 4
186+
python setup.py build_ext -j 4
187187

188188
.. _contributing.dev_c:
189189

@@ -195,6 +195,13 @@ operations. To install pandas from source, you need to compile these C
195195
extensions, which means you need a C compiler. This process depends on which
196196
platform you're using.
197197

198+
If you have setup your environment using ``conda``, the packages ``c-compiler``
199+
and ``cxx-compiler`` will install a fitting compiler for your platform that is
200+
compatible with the remaining conda packages. On Windows and macOS, you will
201+
also need to install the SDKs as they have to be distributed separately.
202+
These packages will be automatically installed by using ``pandas``'s
203+
``environment.yml``.
204+
198205
**Windows**
199206

200207
You will need `Build Tools for Visual Studio 2017
@@ -206,12 +213,33 @@ You will need `Build Tools for Visual Studio 2017
206213
scrolling down to "All downloads" -> "Tools for Visual Studio 2019".
207214
In the installer, select the "C++ build tools" workload.
208215

216+
You can install the necessary components on the commandline using
217+
`vs_buildtools.exe <https://aka.ms/vs/16/release/vs_buildtools.exe>`_:
218+
219+
.. code::
220+
221+
vs_buildtools.exe --quiet --wait --norestart --nocache ^
222+
--installPath C:\BuildTools ^
223+
--add "Microsoft.VisualStudio.Workload.VCTools;includeRecommended" ^
224+
--add Microsoft.VisualStudio.Component.VC.v141 ^
225+
--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 ^
226+
--add Microsoft.VisualStudio.Component.Windows10SDK.17763
227+
228+
To setup the right paths on the commandline, call
229+
``"C:\BuildTools\VC\Auxiliary\Build\vcvars64.bat" -vcvars_ver=14.16 10.0.17763.0``.
230+
209231
**macOS**
210232

211-
Information about compiler installation can be found here:
233+
To use the ``conda``-based compilers, you will need to install the
234+
Developer Tools using ``xcode-select --install``. Otherwise
235+
information about compiler installation can be found here:
212236
https://devguide.python.org/setup/#macos
213237

214-
**Unix**
238+
**Linux**
239+
240+
For Linux-based ``conda`` installations, you won't have to install any
241+
additional components outside of the conda environment. The instructions
242+
below are only needed if your setup isn't based on conda environments.
215243

216244
Some Linux distributions will come with a pre-installed C compiler. To find out
217245
which compilers (and versions) are installed on your system::
@@ -243,11 +271,10 @@ Let us know if you have any difficulties by opening an issue or reaching out on
243271
Creating a Python environment
244272
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
245273

246-
Now that you have a C compiler, create an isolated pandas development
247-
environment:
274+
Now create an isolated pandas development environment:
248275

249-
* Install either `Anaconda <https://www.anaconda.com/download/>`_ or `miniconda
250-
<https://conda.io/miniconda.html>`_
276+
* Install either `Anaconda <https://www.anaconda.com/download/>`_, `miniconda
277+
<https://conda.io/miniconda.html>`_, or `miniforge <https://github.com/conda-forge/miniforge>`_
251278
* Make sure your conda is up to date (``conda update conda``)
252279
* Make sure that you have :ref:`cloned the repository <contributing.forking>`
253280
* ``cd`` to the pandas source directory
@@ -268,7 +295,7 @@ We'll now kick off a three-step process:
268295
source activate pandas-dev
269296
270297
# Build and install pandas
271-
python setup.py build_ext --inplace -j 4
298+
python setup.py build_ext -j 4
272299
python -m pip install -e . --no-build-isolation --no-use-pep517
273300
274301
At this point you should be able to import pandas from your locally built version::
@@ -315,7 +342,7 @@ You'll need to have at least Python 3.6.1 installed on your system.
315342
python -m pip install -r requirements-dev.txt
316343
317344
# Build and install pandas
318-
python setup.py build_ext --inplace -j 4
345+
python setup.py build_ext -j 4
319346
python -m pip install -e . --no-build-isolation --no-use-pep517
320347
321348
**Unix**/**macOS with pyenv**
@@ -339,7 +366,7 @@ Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
339366
python -m pip install -r requirements-dev.txt
340367
341368
# Build and install pandas
342-
python setup.py build_ext --inplace -j 4
369+
python setup.py build_ext -j 4
343370
python -m pip install -e . --no-build-isolation --no-use-pep517
344371
345372
**Windows**
@@ -365,7 +392,7 @@ should already exist.
365392
python -m pip install -r requirements-dev.txt
366393
367394
# Build and install pandas
368-
python setup.py build_ext --inplace -j 4
395+
python setup.py build_ext -j 4
369396
python -m pip install -e . --no-build-isolation --no-use-pep517
370397
371398
Creating a branch

doc/source/development/policies.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ We will not introduce new deprecations in patch releases.
3535
Deprecations will only be enforced in **major** releases. For example, if a
3636
behavior is deprecated in pandas 1.2.0, it will continue to work, with a
3737
warning, for all releases in the 1.x series. The behavior will change and the
38-
deprecation removed in the next next major release (2.0.0).
38+
deprecation removed in the next major release (2.0.0).
3939

4040
.. note::
4141

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ pyxlsb 1.0.6 Reading for xlsb files
284284
qtpy Clipboard I/O
285285
s3fs 0.4.0 Amazon S3 access
286286
tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)
287-
xarray 0.12.0 pandas-like API for N-dimensional data
287+
xarray 0.12.3 pandas-like API for N-dimensional data
288288
xclip Clipboard I/O on linux
289289
xlrd 1.2.0 Excel reading
290290
xlwt 1.3.0 Excel writing

doc/source/reference/index.rst

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ public functions related to data types in pandas.
3030
series
3131
frame
3232
arrays
33-
panel
3433
indexing
3534
offset_frequency
3635
window

doc/source/reference/panel.rst

-10
This file was deleted.

doc/source/reference/style.rst

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Style application
3636
Styler.where
3737
Styler.format
3838
Styler.set_precision
39+
Styler.set_td_classes
3940
Styler.set_table_styles
4041
Styler.set_table_attributes
4142
Styler.set_caption

doc/source/user_guide/dsintro.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ Data Classes as introduced in `PEP557 <https://www.python.org/dev/peps/pep-0557>
439439
can be passed into the DataFrame constructor.
440440
Passing a list of dataclasses is equivalent to passing a list of dictionaries.
441441

442-
Please be aware, that that all values in the list should be dataclasses, mixing
442+
Please be aware, that all values in the list should be dataclasses, mixing
443443
types in the list would result in a TypeError.
444444

445445
.. ipython:: python

doc/source/user_guide/groupby.rst

+9
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,15 @@ index are the group names and whose values are the sizes of each group.
524524
525525
grouped.describe()
526526
527+
Another aggregation example is to compute the number of unique values of each group. This is similar to the ``value_counts`` function, except that it only counts unique values.
528+
529+
.. ipython:: python
530+
531+
ll = [['foo', 1], ['foo', 2], ['foo', 2], ['bar', 1], ['bar', 1]]
532+
df4 = pd.DataFrame(ll, columns=["A", "B"])
533+
df4
534+
df4.groupby("A")["B"].nunique()
535+
527536
.. note::
528537

529538
Aggregation functions **will not** return the groups that you are aggregating over

0 commit comments

Comments
 (0)