Skip to content

Commit 08215e8

Browse files
committed
Merge remote-tracking branch 'upstream/master' into mpl-convert-cache
2 parents 9c00578 + be6c369 commit 08215e8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1748
-1605
lines changed

.github/FUNDING.yml

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
custom: https://pandas.pydata.org/donate.html
2+
tidelift: pypi/pandas

.github/SECURITY.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
To report a security vulnerability to pandas, please go to https://tidelift.com/security and see the instructions there.

.travis.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ env:
2121

2222
git:
2323
# for cloning
24-
depth: 2000
24+
depth: false
2525

2626
matrix:
2727
fast_finish: true
@@ -63,7 +63,7 @@ before_install:
6363
- pwd
6464
- uname -a
6565
- git --version
66-
- git tag
66+
- ./ci/check_git_tags.sh
6767
# Because travis runs on Google Cloud and has a /etc/boto.cfg,
6868
# it breaks moto import, see:
6969
# https://github.com/spulec/moto/issues/1771

asv_bench/benchmarks/index_object.py

+18
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gc
12
import numpy as np
23
import pandas.util.testing as tm
34
from pandas import (
@@ -225,4 +226,21 @@ def time_intersection_both_duplicate(self, N):
225226
self.intv.intersection(self.intv2)
226227

227228

229+
class GC:
230+
params = [1, 2, 5]
231+
232+
def create_use_drop(self):
233+
idx = Index(list(range(1000 * 1000)))
234+
idx._engine
235+
236+
def peakmem_gc_instances(self, N):
237+
try:
238+
gc.disable()
239+
240+
for _ in range(N):
241+
self.create_use_drop()
242+
finally:
243+
gc.enable()
244+
245+
228246
from .pandas_vb_common import setup # noqa: F401

azure-pipelines.yml

+4-18
Original file line numberDiff line numberDiff line change
@@ -22,80 +22,67 @@ jobs:
2222
timeoutInMinutes: 90
2323
steps:
2424
- script: |
25-
# XXX next command should avoid redefining the path in every step, but
26-
# made the process crash as it couldn't find deactivate
27-
#echo '##vso[task.prependpath]$HOME/miniconda3/bin'
25+
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
2826
echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
2927
echo '##vso[task.setvariable variable=AZURE]true'
3028
displayName: 'Setting environment variables'
3129
3230
# Do not require a conda environment
33-
- script: |
34-
export PATH=$HOME/miniconda3/bin:$PATH
35-
ci/code_checks.sh patterns
31+
- script: ci/code_checks.sh patterns
3632
displayName: 'Looking for unwanted patterns'
3733
condition: true
3834

3935
- script: |
40-
export PATH=$HOME/miniconda3/bin:$PATH
4136
sudo apt-get install -y libc6-dev-i386
4237
ci/setup_env.sh
4338
displayName: 'Setup environment and build pandas'
4439
condition: true
4540
4641
# Do not require pandas
4742
- script: |
48-
export PATH=$HOME/miniconda3/bin:$PATH
4943
source activate pandas-dev
5044
ci/code_checks.sh lint
5145
displayName: 'Linting'
5246
condition: true
5347
5448
- script: |
55-
export PATH=$HOME/miniconda3/bin:$PATH
5649
source activate pandas-dev
5750
ci/code_checks.sh dependencies
5851
displayName: 'Dependencies consistency'
5952
condition: true
6053
6154
# Require pandas
6255
- script: |
63-
export PATH=$HOME/miniconda3/bin:$PATH
6456
source activate pandas-dev
6557
ci/code_checks.sh code
6658
displayName: 'Checks on imported code'
6759
condition: true
6860
6961
- script: |
70-
export PATH=$HOME/miniconda3/bin:$PATH
7162
source activate pandas-dev
7263
ci/code_checks.sh doctests
7364
displayName: 'Running doctests'
7465
condition: true
7566
7667
- script: |
77-
export PATH=$HOME/miniconda3/bin:$PATH
7868
source activate pandas-dev
7969
ci/code_checks.sh docstrings
8070
displayName: 'Docstring validation'
8171
condition: true
8272
8373
- script: |
84-
export PATH=$HOME/miniconda3/bin:$PATH
8574
source activate pandas-dev
8675
ci/code_checks.sh typing
8776
displayName: 'Typing validation'
8877
condition: true
8978
9079
- script: |
91-
export PATH=$HOME/miniconda3/bin:$PATH
9280
source activate pandas-dev
9381
pytest --capture=no --strict scripts
94-
displayName: 'Testing docstring validaton script'
82+
displayName: 'Testing docstring validation script'
9583
condition: true
9684
9785
- script: |
98-
export PATH=$HOME/miniconda3/bin:$PATH
9986
source activate pandas-dev
10087
cd asv_bench
10188
asv check -E existing
@@ -124,16 +111,15 @@ jobs:
124111
steps:
125112
- script: |
126113
echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
114+
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
127115
displayName: 'Setting environment variables'
128116
129117
- script: |
130-
export PATH=$HOME/miniconda3/bin:$PATH
131118
sudo apt-get install -y libc6-dev-i386
132119
ci/setup_env.sh
133120
displayName: 'Setup environment and build pandas'
134121
135122
- script: |
136-
export PATH=$HOME/miniconda3/bin:$PATH
137123
source activate pandas-dev
138124
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
139125
doc/make.py --warnings-are-errors | tee sphinx.log ; SPHINX_RET=${PIPESTATUS[0]}

ci/azure/posix.yml

+2-5
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,15 @@ jobs:
5656
steps:
5757
- script: |
5858
if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
59+
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
5960
echo "Creating Environment"
6061
ci/setup_env.sh
6162
displayName: 'Setup environment and build pandas'
6263
- script: |
63-
export PATH=$HOME/miniconda3/bin:$PATH
6464
source activate pandas-dev
6565
ci/run_tests.sh
6666
displayName: 'Test'
67-
- script: |
68-
export PATH=$HOME/miniconda3/bin:$PATH
69-
source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
67+
- script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
7068
- task: PublishTestResults@2
7169
inputs:
7270
testResultsFiles: 'test-data-*.xml'
@@ -97,7 +95,6 @@ jobs:
9795
}
9896
displayName: 'Check for test failures'
9997
- script: |
100-
export PATH=$HOME/miniconda3/bin:$PATH
10198
source activate pandas-dev
10299
python ci/print_skipped.py
103100
displayName: 'Print skipped tests'

ci/azure/windows.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ jobs:
1717
CONDA_PY: "37"
1818

1919
steps:
20-
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
20+
- powershell: |
21+
Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
22+
Write-Host "##vso[task.prependpath]$HOME/miniconda3/bin"
2123
displayName: 'Add conda to PATH'
2224
- script: conda update -q -n base conda
2325
displayName: Update conda
@@ -52,7 +54,6 @@ jobs:
5254
}
5355
displayName: 'Check for test failures'
5456
- script: |
55-
export PATH=$HOME/miniconda3/bin:$PATH
5657
source activate pandas-dev
5758
python ci/print_skipped.py
5859
displayName: 'Print skipped tests'

ci/check_git_tags.sh

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
set -e
2+
3+
if [[ ! $(git tag) ]]; then
4+
echo "No git tags in clone, please sync your git tags with upstream using:"
5+
echo " git fetch --tags upstream"
6+
echo " git push --tags origin"
7+
echo ""
8+
echo "If the issue persists, the clone depth needs to be increased in .travis.yml"
9+
exit 1
10+
fi
11+
12+
# This will error if there are no tags and we omit --always
13+
DESCRIPTION=$(git describe --long --tags)
14+
echo "$DESCRIPTION"
15+
16+
if [[ "$DESCRIPTION" == *"untagged"* ]]; then
17+
echo "Unable to determine most recent tag, aborting build"
18+
exit 1
19+
else
20+
if [[ "$DESCRIPTION" != *"g"* ]]; then
21+
# A good description will have the hash prefixed by g, a bad one will be
22+
# just the hash
23+
echo "Unable to determine most recent tag, aborting build"
24+
exit 1
25+
else
26+
echo "$(git tag)"
27+
fi
28+
fi

ci/code_checks.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,8 @@ fi
263263
### DOCSTRINGS ###
264264
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
265265

266-
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA05)' ; echo $MSG
267-
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA05
266+
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA05)' ; echo $MSG
267+
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA05
268268
RET=$(($RET + $?)) ; echo $MSG "DONE"
269269

270270
fi

ci/deps/azure-36-locale.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ dependencies:
2020
- xlsxwriter=0.9.8
2121
- xlwt=1.2.0
2222
# universal
23-
- pytest>=4.0.2,<5.0.0
24-
- pytest-xdist
23+
- pytest>=5.0.0
24+
- pytest-xdist>=1.29.0
2525
- pytest-mock
2626
- pytest-azurepipelines
2727
- hypothesis>=3.58.0

ci/deps/travis-36-slow.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ dependencies:
2525
- xlsxwriter
2626
- xlwt
2727
# universal
28-
- pytest>=4.0.2,<5.0.0
29-
- pytest-xdist
28+
- pytest>=5.0.0
29+
- pytest-xdist>=1.29.0
3030
- pytest-mock
3131
- moto
3232
- hypothesis>=3.58.0

doc/source/development/developer.rst

+41-19
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,19 @@ So that a ``pandas.DataFrame`` can be faithfully reconstructed, we store a
3737

3838
.. code-block:: text
3939
40-
{'index_columns': ['__index_level_0__', '__index_level_1__', ...],
40+
{'index_columns': [<descr0>, <descr1>, ...],
4141
'column_indexes': [<ci0>, <ci1>, ..., <ciN>],
4242
'columns': [<c0>, <c1>, ...],
43-
'pandas_version': $VERSION}
43+
'pandas_version': $VERSION,
44+
'creator': {
45+
'library': $LIBRARY,
46+
'version': $LIBRARY_VERSION
47+
}}
4448
45-
Here, ``<c0>``/``<ci0>`` and so forth are dictionaries containing the metadata
49+
The "descriptor" values ``<descr0>`` in the ``'index_columns'`` field are
50+
strings (referring to a column) or dictionaries with values as described below.
51+
52+
The ``<c0>``/``<ci0>`` and so forth are dictionaries containing the metadata
4653
for each column, *including the index columns*. This has JSON form:
4754

4855
.. code-block:: text
@@ -53,26 +60,37 @@ for each column, *including the index columns*. This has JSON form:
5360
'numpy_type': numpy_type,
5461
'metadata': metadata}
5562
56-
.. note::
63+
See below for the detailed specification for these.
64+
65+
Index Metadata Descriptors
66+
~~~~~~~~~~~~~~~~~~~~~~~~~~
67+
68+
``RangeIndex`` can be stored as metadata only, not requiring serialization. The
69+
descriptor format for these as is follows:
5770

58-
Every index column is stored with a name matching the pattern
59-
``__index_level_\d+__`` and its corresponding column information is can be
60-
found with the following code snippet.
71+
.. code-block:: python
6172
62-
Following this naming convention isn't strictly necessary, but strongly
63-
suggested for compatibility with Arrow.
73+
index = pd.RangeIndex(0, 10, 2)
74+
{'kind': 'range',
75+
'name': index.name,
76+
'start': index.start,
77+
'stop': index.stop,
78+
'step': index.step}
6479
65-
Here's an example of how the index metadata is structured in pyarrow:
80+
Other index types must be serialized as data columns along with the other
81+
DataFrame columns. The metadata for these is a string indicating the name of
82+
the field in the data columns, for example ``'__index_level_0__'``.
6683

67-
.. code-block:: python
84+
If an index has a non-None ``name`` attribute, and there is no other column
85+
with a name matching that value, then the ``index.name`` value can be used as
86+
the descriptor. Otherwise (for unnamed indexes and ones with names colliding
87+
with other column names) a disambiguating name with pattern matching
88+
``__index_level_\d+__`` should be used. In cases of named indexes as data
89+
columns, ``name`` attribute is always stored in the column descriptors as
90+
above.
6891

69-
# assuming there's at least 3 levels in the index
70-
index_columns = metadata['index_columns'] # noqa: F821
71-
columns = metadata['columns'] # noqa: F821
72-
ith_index = 2
73-
assert index_columns[ith_index] == '__index_level_2__'
74-
ith_index_info = columns[-len(index_columns):][ith_index]
75-
ith_index_level_name = ith_index_info['name']
92+
Column Metadata
93+
~~~~~~~~~~~~~~~
7694

7795
``pandas_type`` is the logical type of the column, and is one of:
7896

@@ -161,4 +179,8 @@ As an example of fully-formed metadata:
161179
'numpy_type': 'int64',
162180
'metadata': None}
163181
],
164-
'pandas_version': '0.20.0'}
182+
'pandas_version': '0.20.0',
183+
'creator': {
184+
'library': 'pyarrow',
185+
'version': '0.13.0'
186+
}}

doc/source/reference/extensions.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ objects.
3434
3535
api.extensions.ExtensionArray._concat_same_type
3636
api.extensions.ExtensionArray._formatter
37-
api.extensions.ExtensionArray._formatting_values
3837
api.extensions.ExtensionArray._from_factorized
3938
api.extensions.ExtensionArray._from_sequence
4039
api.extensions.ExtensionArray._from_sequence_of_strings
@@ -45,6 +44,7 @@ objects.
4544
api.extensions.ExtensionArray.argsort
4645
api.extensions.ExtensionArray.astype
4746
api.extensions.ExtensionArray.copy
47+
api.extensions.ExtensionArray.view
4848
api.extensions.ExtensionArray.dropna
4949
api.extensions.ExtensionArray.factorize
5050
api.extensions.ExtensionArray.fillna

doc/source/reference/window.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
======
66
Window
77
======
8-
.. currentmodule:: pandas.core.window
98

109
Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.
1110
Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc.
1211
EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc.
1312

1413
Standard moving window functions
1514
--------------------------------
15+
.. currentmodule:: pandas.core.window.rolling
16+
1617
.. autosummary::
1718
:toctree: api/
1819

@@ -38,6 +39,8 @@ Standard moving window functions
3839

3940
Standard expanding window functions
4041
-----------------------------------
42+
.. currentmodule:: pandas.core.window.expanding
43+
4144
.. autosummary::
4245
:toctree: api/
4346

@@ -59,6 +62,8 @@ Standard expanding window functions
5962

6063
Exponentially-weighted moving window functions
6164
----------------------------------------------
65+
.. currentmodule:: pandas.core.window.ewm
66+
6267
.. autosummary::
6368
:toctree: api/
6469

0 commit comments

Comments
 (0)