Skip to content

Commit 873ae61

Browse files
author
Marco Gorelli
committed
Merge remote-tracking branch 'upstream/master' into hey-everyone
2 parents a44f381 + 8621970 commit 873ae61

File tree

114 files changed

+3649
-3041
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+3649
-3041
lines changed

.github/workflows/ci.yml

+34-1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,39 @@ jobs:
154154
echo "region = BHS" >> $RCLONE_CONFIG_PATH
155155
if: github.event_name == 'push'
156156

157-
- name: Sync web
157+
- name: Sync web with OVH
158158
run: rclone sync pandas_web ovh_cloud_pandas_web:dev
159159
if: github.event_name == 'push'
160+
161+
- name: Create git repo to upload the built docs to GitHub pages
162+
run: |
163+
cd pandas_web
164+
git init
165+
touch .nojekyll
166+
echo "dev.pandas.io" > CNAME
167+
printf "User-agent: *\nDisallow: /" > robots.txt
168+
git add --all .
169+
git config user.email "[email protected]"
170+
git config user.name "pandas-bot"
171+
git commit -m "pandas web and documentation in master"
172+
if: github.event_name == 'push'
173+
174+
# For this task to work, next steps are required:
175+
# 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "[email protected]"`)
176+
# 2. Go to https://github.com/pandas-dev/pandas/settings/secrets
177+
# 3. Click on "Add a new secret"
178+
# 4. Name: "github_pagas_ssh_key", Value: <Content of the private ssh key>
179+
# 5. The public key needs to be upladed to https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
180+
- name: Install GitHub pages ssh deployment key
181+
uses: shimataro/ssh-key-action@v2
182+
with:
183+
key: ${{ secrets.github_pages_ssh_key }}
184+
known_hosts: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ=='
185+
if: github.event_name == 'push'
186+
187+
- name: Publish web and docs to GitHub pages
188+
run: |
189+
cd pandas_web
190+
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
191+
git push -f origin master
192+
if: github.event_name == 'push'

asv_bench/benchmarks/algorithms.py

+35-56
Original file line numberDiff line numberDiff line change
@@ -31,83 +31,62 @@ def time_maybe_convert_objects(self):
3131

3232
class Factorize:
3333

34-
params = [[True, False], ["int", "uint", "float", "string"]]
35-
param_names = ["sort", "dtype"]
36-
37-
def setup(self, sort, dtype):
38-
N = 10 ** 5
39-
data = {
40-
"int": pd.Int64Index(np.arange(N).repeat(5)),
41-
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
42-
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
43-
"string": tm.makeStringIndex(N).repeat(5),
44-
}
45-
self.idx = data[dtype]
46-
47-
def time_factorize(self, sort, dtype):
48-
self.idx.factorize(sort=sort)
49-
50-
51-
class FactorizeUnique:
52-
53-
params = [[True, False], ["int", "uint", "float", "string"]]
54-
param_names = ["sort", "dtype"]
34+
params = [
35+
[True, False],
36+
[True, False],
37+
["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
38+
]
39+
param_names = ["unique", "sort", "dtype"]
5540

56-
def setup(self, sort, dtype):
41+
def setup(self, unique, sort, dtype):
5742
N = 10 ** 5
5843
data = {
5944
"int": pd.Int64Index(np.arange(N)),
6045
"uint": pd.UInt64Index(np.arange(N)),
61-
"float": pd.Float64Index(np.arange(N)),
46+
"float": pd.Float64Index(np.random.randn(N)),
6247
"string": tm.makeStringIndex(N),
63-
}
64-
self.idx = data[dtype]
65-
assert self.idx.is_unique
66-
67-
def time_factorize(self, sort, dtype):
48+
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
49+
"datetime64[ns, tz]": pd.date_range(
50+
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
51+
),
52+
}[dtype]
53+
if not unique:
54+
data = data.repeat(5)
55+
self.idx = data
56+
57+
def time_factorize(self, unique, sort, dtype):
6858
self.idx.factorize(sort=sort)
6959

7060

7161
class Duplicated:
7262

73-
params = [["first", "last", False], ["int", "uint", "float", "string"]]
74-
param_names = ["keep", "dtype"]
75-
76-
def setup(self, keep, dtype):
77-
N = 10 ** 5
78-
data = {
79-
"int": pd.Int64Index(np.arange(N).repeat(5)),
80-
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
81-
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
82-
"string": tm.makeStringIndex(N).repeat(5),
83-
}
84-
self.idx = data[dtype]
85-
# cache is_unique
86-
self.idx.is_unique
87-
88-
def time_duplicated(self, keep, dtype):
89-
self.idx.duplicated(keep=keep)
90-
91-
92-
class DuplicatedUniqueIndex:
93-
94-
params = ["int", "uint", "float", "string"]
95-
param_names = ["dtype"]
63+
params = [
64+
[True, False],
65+
["first", "last", False],
66+
["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
67+
]
68+
param_names = ["unique", "keep", "dtype"]
9669

97-
def setup(self, dtype):
70+
def setup(self, unique, keep, dtype):
9871
N = 10 ** 5
9972
data = {
10073
"int": pd.Int64Index(np.arange(N)),
10174
"uint": pd.UInt64Index(np.arange(N)),
10275
"float": pd.Float64Index(np.random.randn(N)),
10376
"string": tm.makeStringIndex(N),
104-
}
105-
self.idx = data[dtype]
77+
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
78+
"datetime64[ns, tz]": pd.date_range(
79+
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
80+
),
81+
}[dtype]
82+
if not unique:
83+
data = data.repeat(5)
84+
self.idx = data
10685
# cache is_unique
10786
self.idx.is_unique
10887

109-
def time_duplicated_unique(self, dtype):
110-
self.idx.duplicated()
88+
def time_duplicated(self, unique, keep, dtype):
89+
self.idx.duplicated(keep=keep)
11190

11291

11392
class Hashing:

asv_bench/benchmarks/categoricals.py

-3
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,6 @@ def setup(self):
258258
def time_get_loc(self):
259259
self.index.get_loc(self.category)
260260

261-
def time_shape(self):
262-
self.index.shape
263-
264261
def time_shallow_copy(self):
265262
self.index._shallow_copy()
266263

asv_bench/benchmarks/index_cached_properties.py

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class IndexCache:
77

88
params = [
99
[
10+
"CategoricalIndex",
1011
"DatetimeIndex",
1112
"Float64Index",
1213
"IntervalIndex",
@@ -42,6 +43,8 @@ def setup(self, index_type):
4243
self.idx = pd.Float64Index(range(N))
4344
elif index_type == "UInt64Index":
4445
self.idx = pd.UInt64Index(range(N))
46+
elif index_type == "CategoricalIndex":
47+
self.idx = pd.CategoricalIndex(range(N), range(N))
4548
else:
4649
raise ValueError
4750
assert len(self.idx) == N

asv_bench/benchmarks/index_object.py

-8
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,6 @@ def time_datetime_difference_disjoint(self):
5555
self.datetime_left.difference(self.datetime_right)
5656

5757

58-
class Datetime:
59-
def setup(self):
60-
self.dr = date_range("20000101", freq="D", periods=10000)
61-
62-
def time_is_dates_only(self):
63-
self.dr._is_dates_only
64-
65-
6658
class Range:
6759
def setup(self):
6860
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)

asv_bench/benchmarks/indexing.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""
2+
These benchmarks are for Series and DataFrame indexing methods. For the
3+
lower-level methods directly on Index and subclasses, see index_object.py,
4+
indexing_engine.py, and index_cached.py
5+
"""
16
import warnings
27

38
import numpy as np

asv_bench/benchmarks/period.py

-3
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,6 @@ def setup(self):
8585
def time_get_loc(self):
8686
self.index.get_loc(self.period)
8787

88-
def time_shape(self):
89-
self.index.shape
90-
9188
def time_shallow_copy(self):
9289
self.index._shallow_copy()
9390

asv_bench/benchmarks/timedelta.py

-3
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,6 @@ def setup(self):
7373
def time_get_loc(self):
7474
self.index.get_loc(self.timedelta)
7575

76-
def time_shape(self):
77-
self.index.shape
78-
7976
def time_shallow_copy(self):
8077
self.index._shallow_copy()
8178

asv_bench/benchmarks/timeseries.py

+3-14
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ def time_to_date(self, index_type):
5757
def time_to_pydatetime(self, index_type):
5858
self.index.to_pydatetime()
5959

60+
def time_is_dates_only(self, index_type):
61+
self.index._is_dates_only
62+
6063

6164
class TzLocalize:
6265

@@ -91,20 +94,6 @@ def time_reest_datetimeindex(self, tz):
9194
self.df.reset_index()
9295

9396

94-
class Factorize:
95-
96-
params = [None, "Asia/Tokyo"]
97-
param_names = "tz"
98-
99-
def setup(self, tz):
100-
N = 100000
101-
self.dti = date_range("2011-01-01", freq="H", periods=N, tz=tz)
102-
self.dti = self.dti.repeat(5)
103-
104-
def time_factorize(self, tz):
105-
self.dti.factorize()
106-
107-
10897
class InferFreq:
10998

11099
params = [None, "D", "B"]

azure-pipelines.yml

-75
Original file line numberDiff line numberDiff line change
@@ -15,78 +15,3 @@ jobs:
1515
parameters:
1616
name: Windows
1717
vmImage: vs2017-win2016
18-
19-
- job: 'Web_and_Docs'
20-
pool:
21-
vmImage: ubuntu-16.04
22-
timeoutInMinutes: 90
23-
steps:
24-
- script: |
25-
echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
26-
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
27-
displayName: 'Setting environment variables'
28-
29-
- script: |
30-
sudo apt-get install -y libc6-dev-i386
31-
ci/setup_env.sh
32-
displayName: 'Setup environment and build pandas'
33-
34-
- script: |
35-
source activate pandas-dev
36-
python web/pandas_web.py web/pandas --target-path=web/build
37-
displayName: 'Build website'
38-
39-
- script: |
40-
source activate pandas-dev
41-
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
42-
doc/make.py --warnings-are-errors | tee sphinx.log ; SPHINX_RET=${PIPESTATUS[0]}
43-
grep -B1 "^<<<-------------------------------------------------------------------------$" sphinx.log ; IPY_RET=$(( $? != 1 ))
44-
exit $(( $SPHINX_RET + $IPY_RET ))
45-
displayName: 'Build documentation'
46-
47-
- script: |
48-
mkdir -p to_deploy/docs
49-
cp -r web/build/* to_deploy/
50-
cp -r doc/build/html/* to_deploy/docs/
51-
displayName: 'Merge website and docs'
52-
53-
- script: |
54-
cd to_deploy
55-
git init
56-
touch .nojekyll
57-
echo "dev.pandas.io" > CNAME
58-
printf "User-agent: *\nDisallow: /" > robots.txt
59-
git add --all .
60-
git config user.email "[email protected]"
61-
git config user.name "pandas-bot"
62-
git commit -m "pandas web and documentation in master"
63-
displayName: 'Create git repo for docs build'
64-
condition : |
65-
and(not(eq(variables['Build.Reason'], 'PullRequest')),
66-
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
67-
68-
# For `InstallSSHKey@0` to work, next steps are required:
69-
# 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "[email protected]"`)
70-
# 2. Go to "Library > Secure files" in the Azure Pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles
71-
# 3. Click on "+ Secure file"
72-
# 4. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key")
73-
# 5. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save
74-
# 6. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be set as a deploy key of the repo where the docs will be pushed (with write access): https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
75-
- task: InstallSSHKey@0
76-
inputs:
77-
hostName: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ=='
78-
sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== [email protected]'
79-
sshKeySecureFile: 'pandas_docs_key'
80-
displayName: 'Install GitHub ssh deployment key'
81-
condition : |
82-
and(not(eq(variables['Build.Reason'], 'PullRequest')),
83-
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
84-
85-
- script: |
86-
cd to_deploy
87-
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
88-
git push -f origin master
89-
displayName: 'Publish web and docs to GitHub pages'
90-
condition : |
91-
and(not(eq(variables['Build.Reason'], 'PullRequest')),
92-
eq(variables['Build.SourceBranch'], 'refs/heads/master'))

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
269269

270270
MSG='Doctests generic.py' ; echo $MSG
271271
pytest -q --doctest-modules pandas/core/generic.py \
272-
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
272+
-k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
273273
RET=$(($RET + $?)) ; echo $MSG "DONE"
274274

275275
MSG='Doctests groupby.py' ; echo $MSG

ci/setup_env.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ echo
5050
echo "update conda"
5151
conda config --set ssl_verify false
5252
conda config --set quiet true --set always_yes true --set changeps1 false
53-
conda install pip # create conda to create a historical artifact for pip & setuptools
53+
conda install pip conda # create conda to create a historical artifact for pip & setuptools
5454
conda update -n base conda
5555

5656
echo "conda info -a"

doc/source/ecosystem.rst

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ joining paths, replacing file extensions, and checking if files exist are also a
5656
Statistics and machine learning
5757
-------------------------------
5858

59+
`pandas-tfrecords <https://pypi.org/project/pandas-tfrecords/>`__
60+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
61+
62+
Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas.
63+
5964
`Statsmodels <https://www.statsmodels.org/>`__
6065
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6166

0 commit comments

Comments
 (0)