Skip to content

Commit c1e1e64

Browse files
authored
Merge branch 'master' into period-multiindex
2 parents e547921 + b1eb97b commit c1e1e64

File tree

487 files changed

+18557
-20585
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

487 files changed

+18557
-20585
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ dist
5757
# wheel files
5858
*.whl
5959
**/wheelhouse/*
60+
pip-wheel-metadata
6061
# coverage
6162
.coverage
6263
coverage.xml

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ repos:
1515
hooks:
1616
- id: isort
1717
language: python_venv
18+
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$

.travis.yml

+11-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ matrix:
3030
- python: 3.5
3131

3232
include:
33+
- dist: bionic
34+
# 18.04
35+
python: 3.8-dev
36+
env:
37+
- JOB="3.8-dev" PATTERN="(not slow and not network)"
38+
3339
- dist: trusty
3440
env:
3541
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
@@ -71,24 +77,27 @@ before_install:
7177
# This overrides travis and tells it to look nowhere.
7278
- export BOTO_CONFIG=/dev/null
7379

80+
7481
install:
7582
- echo "install start"
7683
- ci/prep_cython_cache.sh
7784
- ci/setup_env.sh
7885
- ci/submit_cython_cache.sh
7986
- echo "install done"
8087

88+
8189
before_script:
8290
# display server (for clipboard functionality) needs to be started here,
8391
# does not work if done in install:setup_env.sh (GH-26103)
8492
- export DISPLAY=":99.0"
8593
- echo "sh -e /etc/init.d/xvfb start"
86-
- sh -e /etc/init.d/xvfb start
94+
- if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
8795
- sleep 3
8896

8997
script:
9098
- echo "script start"
91-
- source activate pandas-dev
99+
- echo "$JOB"
100+
- if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
92101
- ci/run_tests.sh
93102

94103
after_script:

MANIFEST.in

+5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ include LICENSE
33
include RELEASE.md
44
include README.md
55
include setup.py
6+
include pyproject.toml
67

78
graft doc
89
prune doc/build
@@ -14,6 +15,7 @@ graft pandas
1415
global-exclude *.bz2
1516
global-exclude *.csv
1617
global-exclude *.dta
18+
global-exclude *.feather
1719
global-exclude *.gz
1820
global-exclude *.h5
1921
global-exclude *.html
@@ -23,7 +25,10 @@ global-exclude *.pickle
2325
global-exclude *.png
2426
global-exclude *.pyc
2527
global-exclude *.pyd
28+
global-exclude *.ods
29+
global-exclude *.odt
2630
global-exclude *.sas7bdat
31+
global-exclude *.sav
2732
global-exclude *.so
2833
global-exclude *.xls
2934
global-exclude *.xlsm

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ black:
1818
black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
1919

2020
develop: build
21-
python setup.py develop
21+
python -m pip install --no-build-isolation -e .
2222

2323
doc:
2424
-rm -rf doc/build doc/source/generated

README.md

+7-6
Original file line numberDiff line numberDiff line change
@@ -188,16 +188,17 @@ python setup.py install
188188

189189
or for installing in [development mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs):
190190

191+
191192
```sh
192-
python setup.py develop
193+
python -m pip install --no-build-isolation -e .
193194
```
194195

195-
Alternatively, you can use `pip` if you want all the dependencies pulled
196-
in automatically (the `-e` option is for installing it in [development
197-
mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs)):
196+
If you have `make`, you can also use `make develop` to run the same command.
197+
198+
or alternatively
198199

199200
```sh
200-
pip install -e .
201+
python setup.py develop
201202
```
202203

203204
See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source).
@@ -224,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
224225

225226
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
226227

227-
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228+
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228229

229230
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
230231

asv_bench/benchmarks/categoricals.py

+14
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,18 @@ def time_sort_values(self):
282282
self.index.sort_values(ascending=False)
283283

284284

285+
class SearchSorted:
286+
def setup(self):
287+
N = 10 ** 5
288+
self.ci = tm.makeCategoricalIndex(N).sort_values()
289+
self.c = self.ci.values
290+
self.key = self.ci.categories[1]
291+
292+
def time_categorical_index_contains(self):
293+
self.ci.searchsorted(self.key)
294+
295+
def time_categorical_contains(self):
296+
self.c.searchsorted(self.key)
297+
298+
285299
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/ctors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class SeriesConstructors:
6767
def setup(self, data_fmt, with_index, dtype):
6868
if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
6969
raise NotImplementedError(
70-
"Series constructors do not support " "using generators with indexes"
70+
"Series constructors do not support using generators with indexes"
7171
)
7272
N = 10 ** 4
7373
if dtype == "float":

asv_bench/benchmarks/eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def time_add(self, engine, threads):
2727

2828
def time_and(self, engine, threads):
2929
pd.eval(
30-
"(self.df > 0) & (self.df2 > 0) & " "(self.df3 > 0) & (self.df4 > 0)",
30+
"(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
3131
engine=engine,
3232
)
3333

asv_bench/benchmarks/frame_methods.py

+11
Original file line numberDiff line numberDiff line change
@@ -609,4 +609,15 @@ def time_dataframe_describe(self):
609609
self.df.describe()
610610

611611

612+
class SelectDtypes:
613+
params = [100, 1000]
614+
param_names = ["n"]
615+
616+
def setup(self, n):
617+
self.df = DataFrame(np.random.randn(10, n))
618+
619+
def time_select_dtypes(self, n):
620+
self.df.select_dtypes(include="int")
621+
622+
612623
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/hdf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def time_write_store_table_dc(self):
8888

8989
def time_query_store_table_wide(self):
9090
self.store.select(
91-
"table_wide", where="index > self.start_wide and " "index < self.stop_wide"
91+
"table_wide", where="index > self.start_wide and index < self.stop_wide"
9292
)
9393

9494
def time_query_store_table(self):
95-
self.store.select("table", where="index > self.start and " "index < self.stop")
95+
self.store.select("table", where="index > self.start and index < self.stop")
9696

9797
def time_store_repr(self):
9898
repr(self.store)

asv_bench/benchmarks/join_merge.py

+46-13
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,10 @@ def time_merge_ordered(self):
273273

274274

275275
class MergeAsof:
276-
params = [["backward", "forward", "nearest"]]
277-
param_names = ["direction"]
276+
params = [["backward", "forward", "nearest"], [None, 5]]
277+
param_names = ["direction", "tolerance"]
278278

279-
def setup(self, direction):
279+
def setup(self, direction, tolerance):
280280
one_count = 200000
281281
two_count = 1000000
282282

@@ -303,6 +303,9 @@ def setup(self, direction):
303303
df1["time32"] = np.int32(df1.time)
304304
df2["time32"] = np.int32(df2.time)
305305

306+
df1["timeu64"] = np.uint64(df1.time)
307+
df2["timeu64"] = np.uint64(df2.time)
308+
306309
self.df1a = df1[["time", "value1"]]
307310
self.df2a = df2[["time", "value2"]]
308311
self.df1b = df1[["time", "key", "value1"]]
@@ -313,22 +316,52 @@ def setup(self, direction):
313316
self.df2d = df2[["time32", "value2"]]
314317
self.df1e = df1[["time", "key", "key2", "value1"]]
315318
self.df2e = df2[["time", "key", "key2", "value2"]]
319+
self.df1f = df1[["timeu64", "value1"]]
320+
self.df2f = df2[["timeu64", "value2"]]
321+
322+
def time_on_int(self, direction, tolerance):
323+
merge_asof(
324+
self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance
325+
)
316326

317-
def time_on_int(self, direction):
318-
merge_asof(self.df1a, self.df2a, on="time", direction=direction)
327+
def time_on_int32(self, direction, tolerance):
328+
merge_asof(
329+
self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance
330+
)
319331

320-
def time_on_int32(self, direction):
321-
merge_asof(self.df1d, self.df2d, on="time32", direction=direction)
332+
def time_on_uint64(self, direction, tolerance):
333+
merge_asof(
334+
self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance
335+
)
322336

323-
def time_by_object(self, direction):
324-
merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction)
337+
def time_by_object(self, direction, tolerance):
338+
merge_asof(
339+
self.df1b,
340+
self.df2b,
341+
on="time",
342+
by="key",
343+
direction=direction,
344+
tolerance=tolerance,
345+
)
325346

326-
def time_by_int(self, direction):
327-
merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction)
347+
def time_by_int(self, direction, tolerance):
348+
merge_asof(
349+
self.df1c,
350+
self.df2c,
351+
on="time",
352+
by="key2",
353+
direction=direction,
354+
tolerance=tolerance,
355+
)
328356

329-
def time_multiby(self, direction):
357+
def time_multiby(self, direction, tolerance):
330358
merge_asof(
331-
self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction
359+
self.df1e,
360+
self.df2e,
361+
on="time",
362+
by=["key", "key2"],
363+
direction=direction,
364+
tolerance=tolerance,
332365
)
333366

334367

asv_bench/benchmarks/rolling.py

+19
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,25 @@ def peakmem_rolling(self, constructor, window, dtype, method):
2525
getattr(self.roll, method)()
2626

2727

28+
class Apply:
29+
params = (
30+
["DataFrame", "Series"],
31+
[10, 1000],
32+
["int", "float"],
33+
[sum, np.sum, lambda x: np.sum(x) + 5],
34+
[True, False],
35+
)
36+
param_names = ["contructor", "window", "dtype", "function", "raw"]
37+
38+
def setup(self, constructor, window, dtype, function, raw):
39+
N = 10 ** 5
40+
arr = (100 * np.random.random(N)).astype(dtype)
41+
self.roll = getattr(pd, constructor)(arr).rolling(window)
42+
43+
def time_rolling(self, constructor, window, dtype, function, raw):
44+
self.roll.apply(function, raw=raw)
45+
46+
2847
class ExpandingMethods:
2948

3049
params = (

azure-pipelines.yml

+17-6
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
displayName: 'Running benchmarks'
105105
condition: true
106106
107-
- job: 'Docs'
107+
- job: 'Web_and_Docs'
108108
pool:
109109
vmImage: ubuntu-16.04
110110
timeoutInMinutes: 90
@@ -119,6 +119,11 @@ jobs:
119119
ci/setup_env.sh
120120
displayName: 'Setup environment and build pandas'
121121
122+
- script: |
123+
source activate pandas-dev
124+
python web/pandas_web.py web/pandas --target-path=web/build
125+
displayName: 'Build website'
126+
122127
- script: |
123128
source activate pandas-dev
124129
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
@@ -128,15 +133,21 @@ jobs:
128133
displayName: 'Build documentation'
129134
130135
- script: |
131-
cd doc/build/html
136+
mkdir -p to_deploy/docs
137+
cp -r web/build/* to_deploy/
138+
cp -r doc/build/html/* to_deploy/docs/
139+
displayName: 'Merge website and docs'
140+
141+
- script: |
142+
cd to_deploy
132143
git init
133144
touch .nojekyll
134145
echo "dev.pandas.io" > CNAME
135146
printf "User-agent: *\nDisallow: /" > robots.txt
136147
git add --all .
137148
git config user.email "[email protected]"
138-
git config user.name "pandas-docs-bot"
139-
git commit -m "pandas documentation in master"
149+
git config user.name "pandas-bot"
150+
git commit -m "pandas web and documentation in master"
140151
displayName: 'Create git repo for docs build'
141152
condition : |
142153
and(not(eq(variables['Build.Reason'], 'PullRequest')),
@@ -160,10 +171,10 @@ jobs:
160171
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
161172
162173
- script: |
163-
cd doc/build/html
174+
cd to_deploy
164175
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
165176
git push -f origin master
166-
displayName: 'Publish docs to GitHub pages'
177+
displayName: 'Publish web and docs to GitHub pages'
167178
condition : |
168179
and(not(eq(variables['Build.Reason'], 'PullRequest')),
169180
eq(variables['Build.SourceBranch'], 'refs/heads/master'))

ci/azure/posix.yml

+7
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,21 @@ jobs:
6060
echo "Creating Environment"
6161
ci/setup_env.sh
6262
displayName: 'Setup environment and build pandas'
63+
6364
- script: |
6465
source activate pandas-dev
6566
ci/run_tests.sh
6667
displayName: 'Test'
68+
6769
- script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
70+
displayName: 'Build versions'
71+
6872
- task: PublishTestResults@2
6973
inputs:
7074
testResultsFiles: 'test-data-*.xml'
7175
testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
76+
displayName: 'Publish test results'
77+
7278
- powershell: |
7379
$junitXml = "test-data-single.xml"
7480
$(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
@@ -94,6 +100,7 @@ jobs:
94100
Write-Error "$($matches[1]) tests failed"
95101
}
96102
displayName: 'Check for test failures'
103+
97104
- script: |
98105
source activate pandas-dev
99106
python ci/print_skipped.py

0 commit comments

Comments
 (0)