Skip to content

Commit e08cdf5

Browse files
authored
Merge branch 'master' into get_loc-nan
2 parents 2085411 + ef77b57 commit e08cdf5

File tree

282 files changed

+12097
-8548
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

282 files changed

+12097
-8548
lines changed

.travis.yml

+11-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ matrix:
3030
- python: 3.5
3131

3232
include:
33+
- dist: bionic
34+
# 18.04
35+
python: 3.8-dev
36+
env:
37+
- JOB="3.8-dev" PATTERN="(not slow and not network)"
38+
3339
- dist: trusty
3440
env:
3541
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
@@ -71,24 +77,27 @@ before_install:
7177
# This overrides travis and tells it to look nowhere.
7278
- export BOTO_CONFIG=/dev/null
7379

80+
7481
install:
7582
- echo "install start"
7683
- ci/prep_cython_cache.sh
7784
- ci/setup_env.sh
7885
- ci/submit_cython_cache.sh
7986
- echo "install done"
8087

88+
8189
before_script:
8290
# display server (for clipboard functionality) needs to be started here,
8391
# does not work if done in install:setup_env.sh (GH-26103)
8492
- export DISPLAY=":99.0"
8593
- echo "sh -e /etc/init.d/xvfb start"
86-
- sh -e /etc/init.d/xvfb start
94+
- if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
8795
- sleep 3
8896

8997
script:
9098
- echo "script start"
91-
- source activate pandas-dev
99+
- echo "$JOB"
100+
- if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
92101
- ci/run_tests.sh
93102

94103
after_script:

asv_bench/benchmarks/categoricals.py

+14
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,18 @@ def time_sort_values(self):
282282
self.index.sort_values(ascending=False)
283283

284284

285+
class SearchSorted:
286+
def setup(self):
287+
N = 10 ** 5
288+
self.ci = tm.makeCategoricalIndex(N).sort_values()
289+
self.c = self.ci.values
290+
self.key = self.ci.categories[1]
291+
292+
def time_categorical_index_contains(self):
293+
self.ci.searchsorted(self.key)
294+
295+
def time_categorical_contains(self):
296+
self.c.searchsorted(self.key)
297+
298+
285299
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/ctors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class SeriesConstructors:
6767
def setup(self, data_fmt, with_index, dtype):
6868
if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
6969
raise NotImplementedError(
70-
"Series constructors do not support " "using generators with indexes"
70+
"Series constructors do not support using generators with indexes"
7171
)
7272
N = 10 ** 4
7373
if dtype == "float":

asv_bench/benchmarks/eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def time_add(self, engine, threads):
2727

2828
def time_and(self, engine, threads):
2929
pd.eval(
30-
"(self.df > 0) & (self.df2 > 0) & " "(self.df3 > 0) & (self.df4 > 0)",
30+
"(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
3131
engine=engine,
3232
)
3333

asv_bench/benchmarks/io/hdf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def time_write_store_table_dc(self):
8888

8989
def time_query_store_table_wide(self):
9090
self.store.select(
91-
"table_wide", where="index > self.start_wide and " "index < self.stop_wide"
91+
"table_wide", where="index > self.start_wide and index < self.stop_wide"
9292
)
9393

9494
def time_query_store_table(self):
95-
self.store.select("table", where="index > self.start and " "index < self.stop")
95+
self.store.select("table", where="index > self.start and index < self.stop")
9696

9797
def time_store_repr(self):
9898
repr(self.store)

asv_bench/benchmarks/join_merge.py

+46-13
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,10 @@ def time_merge_ordered(self):
273273

274274

275275
class MergeAsof:
276-
params = [["backward", "forward", "nearest"]]
277-
param_names = ["direction"]
276+
params = [["backward", "forward", "nearest"], [None, 5]]
277+
param_names = ["direction", "tolerance"]
278278

279-
def setup(self, direction):
279+
def setup(self, direction, tolerance):
280280
one_count = 200000
281281
two_count = 1000000
282282

@@ -303,6 +303,9 @@ def setup(self, direction):
303303
df1["time32"] = np.int32(df1.time)
304304
df2["time32"] = np.int32(df2.time)
305305

306+
df1["timeu64"] = np.uint64(df1.time)
307+
df2["timeu64"] = np.uint64(df2.time)
308+
306309
self.df1a = df1[["time", "value1"]]
307310
self.df2a = df2[["time", "value2"]]
308311
self.df1b = df1[["time", "key", "value1"]]
@@ -313,22 +316,52 @@ def setup(self, direction):
313316
self.df2d = df2[["time32", "value2"]]
314317
self.df1e = df1[["time", "key", "key2", "value1"]]
315318
self.df2e = df2[["time", "key", "key2", "value2"]]
319+
self.df1f = df1[["timeu64", "value1"]]
320+
self.df2f = df2[["timeu64", "value2"]]
321+
322+
def time_on_int(self, direction, tolerance):
323+
merge_asof(
324+
self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance
325+
)
316326

317-
def time_on_int(self, direction):
318-
merge_asof(self.df1a, self.df2a, on="time", direction=direction)
327+
def time_on_int32(self, direction, tolerance):
328+
merge_asof(
329+
self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance
330+
)
319331

320-
def time_on_int32(self, direction):
321-
merge_asof(self.df1d, self.df2d, on="time32", direction=direction)
332+
def time_on_uint64(self, direction, tolerance):
333+
merge_asof(
334+
self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance
335+
)
322336

323-
def time_by_object(self, direction):
324-
merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction)
337+
def time_by_object(self, direction, tolerance):
338+
merge_asof(
339+
self.df1b,
340+
self.df2b,
341+
on="time",
342+
by="key",
343+
direction=direction,
344+
tolerance=tolerance,
345+
)
325346

326-
def time_by_int(self, direction):
327-
merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction)
347+
def time_by_int(self, direction, tolerance):
348+
merge_asof(
349+
self.df1c,
350+
self.df2c,
351+
on="time",
352+
by="key2",
353+
direction=direction,
354+
tolerance=tolerance,
355+
)
328356

329-
def time_multiby(self, direction):
357+
def time_multiby(self, direction, tolerance):
330358
merge_asof(
331-
self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction
359+
self.df1e,
360+
self.df2e,
361+
on="time",
362+
by=["key", "key2"],
363+
direction=direction,
364+
tolerance=tolerance,
332365
)
333366

334367

ci/build38.sh

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash -e
2+
# Special build for python3.8 until numpy puts its own wheels up
3+
4+
sudo apt-get install build-essential gcc xvfb
5+
pip install --no-deps -U pip wheel setuptools
6+
pip install python-dateutil pytz cython pytest pytest-xdist hypothesis
7+
8+
# Possible alternative for getting numpy:
9+
pip install --pre -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com/ numpy
10+
11+
python setup.py build_ext -inplace
12+
python -m pip install --no-build-isolation -e .
13+
14+
python -c "import sys; print(sys.version_info)"
15+
python -c "import pandas as pd"
16+
python -c "import hypothesis"
17+
18+
# TODO: Is there anything else in setup_env that we really want to do?
19+
# ci/setup_env.sh

ci/code_checks.sh

+12-4
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,11 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
122122
MSG='Check for non-standard imports' ; echo $MSG
123123
invgrep -R --include="*.py*" -E "from pandas.core.common import " pandas
124124
invgrep -R --include="*.py*" -E "from collections.abc import " pandas
125-
# invgrep -R --include="*.py*" -E "from numpy import nan " pandas # GH#24822 not yet implemented since the offending imports have not all been removed
125+
invgrep -R --include="*.py*" -E "from numpy import nan " pandas
126+
RET=$(($RET + $?)) ; echo $MSG "DONE"
127+
128+
MSG='Check for use of exec' ; echo $MSG
129+
invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
126130
RET=$(($RET + $?)) ; echo $MSG "DONE"
127131

128132
MSG='Check for pytest warns' ; echo $MSG
@@ -184,7 +188,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
184188
invgrep -R --include="*.rst" ".. ipython ::" doc/source
185189
RET=$(($RET + $?)) ; echo $MSG "DONE"
186190

187-
MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
191+
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
188192
set -o pipefail
189193
if [[ "$AZURE" == "true" ]]; then
190194
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
@@ -262,13 +266,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
262266
-k"-from_arrays -from_breaks -from_intervals -from_tuples -set_closed -to_tuples -interval_range"
263267
RET=$(($RET + $?)) ; echo $MSG "DONE"
264268

269+
MSG='Doctests arrays/string_.py' ; echo $MSG
270+
pytest -q --doctest-modules pandas/core/arrays/string_.py
271+
RET=$(($RET + $?)) ; echo $MSG "DONE"
272+
265273
fi
266274

267275
### DOCSTRINGS ###
268276
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
269277

270-
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA05)' ; echo $MSG
271-
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA05
278+
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA01, SA02, SA03, SA05)' ; echo $MSG
279+
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA01,SA02,SA03,SA05
272280
RET=$(($RET + $?)) ; echo $MSG "DONE"
273281

274282
fi

ci/deps/azure-36-32bit.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6+
- attrs=19.1.0
67
- gcc_linux-32
78
- gcc_linux-32
89
- gxx_linux-32
@@ -11,7 +12,7 @@ dependencies:
1112
- python=3.6.*
1213
- pytz=2017.2
1314
# universal
14-
- pytest>=4.0.2,<5.0.0
15+
- pytest
1516
- pytest-xdist
1617
- pytest-mock
1718
- pytest-azurepipelines

ci/run_tests.sh

+5-6
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,9 @@ do
4343
# if no tests are found (the case of "single and slow"), pytest exits with code 5, and would make the script fail, if not for the below code
4444
sh -c "$PYTEST_CMD; ret=\$?; [ \$ret = 5 ] && exit 0 || exit \$ret"
4545

46-
# 2019-08-21 disabling because this is hitting HTTP 400 errors GH#27602
47-
# if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
48-
# echo "uploading coverage for $TYPE tests"
49-
# echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
50-
# bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
51-
# fi
46+
if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
47+
echo "uploading coverage for $TYPE tests"
48+
echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
49+
bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
50+
fi
5251
done

ci/setup_env.sh

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/bin/bash -e
22

3+
if [ "$JOB" == "3.8-dev" ]; then
4+
/bin/bash ci/build38.sh
5+
exit 0
6+
fi
37

48
# edit the locale file if needed
59
if [ -n "$LOCALE_OVERRIDE" ]; then
@@ -51,6 +55,7 @@ echo
5155
echo "update conda"
5256
conda config --set ssl_verify false
5357
conda config --set quiet true --set always_yes true --set changeps1 false
58+
conda install pip # create conda to create a historical artifact for pip & setuptools
5459
conda update -n base conda
5560

5661
echo "conda info -a"

doc/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
data/
2+
timeseries.csv
3+
timeseries.parquet
4+
timeseries_wide.parquet

doc/redirects.csv

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ whatsnew,whatsnew/index
66
release,whatsnew/index
77

88
# getting started
9+
install,getting_started/install
910
10min,getting_started/10min
1011
basics,getting_started/basics
1112
comparison_with_r,getting_started/comparison/comparison_with_r
@@ -1577,3 +1578,6 @@ generated/pandas.unique,../reference/api/pandas.unique
15771578
generated/pandas.util.hash_array,../reference/api/pandas.util.hash_array
15781579
generated/pandas.util.hash_pandas_object,../reference/api/pandas.util.hash_pandas_object
15791580
generated/pandas.wide_to_long,../reference/api/pandas.wide_to_long
1581+
1582+
# Cached searches
1583+
reference/api/pandas.DataFrame.from_csv,pandas.read_csv

doc/source/conf.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@
120120
plot_pre_code = """import numpy as np
121121
import pandas as pd"""
122122

123+
# nbsphinx do not use requirejs (breaks bootstrap)
124+
nbsphinx_requirejs_path = ""
125+
123126
# Add any paths that contain templates here, relative to this directory.
124127
templates_path = ["../_templates"]
125128

@@ -191,7 +194,7 @@
191194

192195
# The theme to use for HTML and HTML Help pages. Major themes that come with
193196
# Sphinx are currently 'default' and 'sphinxdoc'.
194-
html_theme = "nature_with_gtoc"
197+
html_theme = "pandas_sphinx_theme"
195198

196199
# The style sheet to use for HTML and HTML Help pages. A file of that name
197200
# must exist either in Sphinx' static/ path, or in one of the custom paths
@@ -204,7 +207,7 @@
204207
# html_theme_options = {}
205208

206209
# Add any paths that contain custom themes here, relative to this directory.
207-
html_theme_path = ["themes"]
210+
# html_theme_path = ["themes"]
208211

209212
# The name for this set of Sphinx documents. If None, it defaults to
210213
# "<project> v<release> documentation".
@@ -628,11 +631,11 @@ def linkcode_resolve(domain, info):
628631
fn = os.path.relpath(fn, start=os.path.dirname(pandas.__file__))
629632

630633
if "+" in pandas.__version__:
631-
return "http://github.com/pandas-dev/pandas/blob/master/pandas/" "{}{}".format(
634+
return "http://github.com/pandas-dev/pandas/blob/master/pandas/{}{}".format(
632635
fn, linespec
633636
)
634637
else:
635-
return "http://github.com/pandas-dev/pandas/blob/" "v{}/pandas/{}{}".format(
638+
return "http://github.com/pandas-dev/pandas/blob/v{}/pandas/{}{}".format(
636639
pandas.__version__, fn, linespec
637640
)
638641

doc/source/development/contributing.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ installed (or you wish to install a newer version) you can install a compiler
172172
yum groupinstall "Development Tools"
173173

174174
For other Linux distributions, consult your favourite search engine for
175-
commpiler installation instructions.
175+
compiler installation instructions.
176176

177177
Let us know if you have any difficulties by opening an issue or reaching out on
178178
`Gitter`_.
@@ -949,10 +949,13 @@ the expected correct result::
949949

950950
assert_frame_equal(pivoted, expected)
951951

952+
Please remember to add the Github Issue Number as a comment to a new test.
953+
E.g. "# brief comment, see GH#28907"
954+
952955
Transitioning to ``pytest``
953956
~~~~~~~~~~~~~~~~~~~~~~~~~~~
954957

955-
*pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class.
958+
*pandas* existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class.
956959

957960
.. code-block:: python
958961

doc/source/ecosystem.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
:orphan:
2+
13
.. _ecosystem:
24

35
{{ header }}

0 commit comments

Comments
 (0)