Skip to content

Commit 46792ac

Browse files
committed
Merge remote-tracking branch 'upstream/master' into docfix-multiindex-set_levels
2 parents 03829ce + 56b6561 commit 46792ac

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1740
-1120
lines changed

.travis.yml

-5
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,12 @@ matrix:
4848
- mysql
4949
- postgresql
5050

51-
# In allow_failures
5251
- env:
5352
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
5453
services:
5554
- mysql
5655
- postgresql
5756

58-
allow_failures:
59-
- env:
60-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
61-
6257
before_install:
6358
- echo "before_install"
6459
# set non-blocking IO on travis

ci/azure/posix.yml

+21-7
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,24 @@ jobs:
1919
ENV_FILE: ci/deps/azure-36-minimum_versions.yaml
2020
CONDA_PY: "36"
2121
PATTERN: "not slow and not network"
22+
2223
py36_locale_slow_old_np:
2324
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
2425
CONDA_PY: "36"
2526
PATTERN: "slow"
26-
LOCALE_OVERRIDE: "zh_CN.UTF-8"
27+
# pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
28+
# we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
29+
LANG: "zh_CN.utf8"
30+
LC_ALL: "zh_CN.utf8"
2731
EXTRA_APT: "language-pack-zh-hans"
2832

2933
py36_locale:
3034
ENV_FILE: ci/deps/azure-36-locale.yaml
3135
CONDA_PY: "36"
3236
PATTERN: "not slow and not network"
33-
LOCALE_OVERRIDE: "it_IT.UTF-8"
37+
LANG: "it_IT.utf8"
38+
LC_ALL: "it_IT.utf8"
39+
EXTRA_APT: "language-pack-it"
3440

3541
py36_32bit:
3642
ENV_FILE: ci/deps/azure-36-32bit.yaml
@@ -42,7 +48,9 @@ jobs:
4248
ENV_FILE: ci/deps/azure-37-locale.yaml
4349
CONDA_PY: "37"
4450
PATTERN: "not slow and not network"
45-
LOCALE_OVERRIDE: "zh_CN.UTF-8"
51+
LANG: "zh_CN.utf8"
52+
LC_ALL: "zh_CN.utf8"
53+
EXTRA_APT: "language-pack-zh-hans"
4654

4755
py37_np_dev:
4856
ENV_FILE: ci/deps/azure-37-numpydev.yaml
@@ -54,10 +62,16 @@ jobs:
5462

5563
steps:
5664
- script: |
57-
if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
58-
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
59-
echo "Creating Environment"
60-
ci/setup_env.sh
65+
if [ "$(uname)" == "Linux" ]; then
66+
sudo apt-get update
67+
sudo apt-get install -y libc6-dev-i386 $EXTRA_APT
68+
fi
69+
displayName: 'Install extra packages'
70+
71+
- script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
72+
displayName: 'Set conda path'
73+
74+
- script: ci/setup_env.sh
6175
displayName: 'Setup environment and build pandas'
6276

6377
- script: |

ci/azure/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- bash: |
3535
source activate pandas-dev
3636
conda list
37-
python setup.py build_ext -q -i
37+
python setup.py build_ext -q -i -j 4
3838
python -m pip install --no-build-isolation -e .
3939
displayName: 'Build'
4040

ci/deps/azure-36-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- pytest-azurepipelines
1414

1515
# pandas dependencies
16-
- beautifulsoup4==4.6.0
16+
- beautifulsoup4=4.6.0
1717
- bottleneck=1.2.*
1818
- lxml
1919
- matplotlib=2.2.2

ci/run_tests.sh

-11
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,6 @@
55
# https://github.com/pytest-dev/pytest/issues/1075
66
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
77

8-
if [ -n "$LOCALE_OVERRIDE" ]; then
9-
export LC_ALL="$LOCALE_OVERRIDE"
10-
export LANG="$LOCALE_OVERRIDE"
11-
PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
12-
if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
13-
echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
14-
# TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
15-
# exit 1
16-
fi
17-
fi
18-
198
if [[ "not network" == *"$PATTERN"* ]]; then
209
export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
2110
fi

ci/setup_env.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#!/bin/bash -e
22

33
# edit the locale file if needed
4-
if [ -n "$LOCALE_OVERRIDE" ]; then
4+
if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
55
echo "Adding locale to the first line of pandas/__init__.py"
66
rm -f pandas/__init__.pyc
7-
SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
7+
SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n"
88
sed -i "$SEDC" pandas/__init__.py
9+
910
echo "[head -4 pandas/__init__.py]"
1011
head -4 pandas/__init__.py
1112
echo
12-
sudo locale-gen "$LOCALE_OVERRIDE"
1313
fi
1414

1515
MINICONDA_DIR="$HOME/miniconda3"

doc/source/getting_started/10min.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,9 @@ Plotting
697697

698698
See the :ref:`Plotting <visualization>` docs.
699699

700+
We use the standard convention for referencing the matplotlib API:
701+
700702
.. ipython:: python
701-
:suppress:
702703
703704
import matplotlib.pyplot as plt
704705
plt.close('all')

doc/source/whatsnew/v1.0.0.rst

+8-3
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ Other enhancements
213213
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
214214
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
215215
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
216-
217216
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
217+
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
218218

219219
Build Changes
220220
^^^^^^^^^^^^^
@@ -773,6 +773,7 @@ Datetimelike
773773
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
774774
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
775775
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
776+
- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`)
776777
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
777778
- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
778779
- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
@@ -885,6 +886,7 @@ I/O
885886
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
886887
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
887888
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
889+
- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
888890

889891
Plotting
890892
^^^^^^^^
@@ -900,12 +902,13 @@ Plotting
900902
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
901903
- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
902904
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
905+
- Allow :meth: `DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
903906
- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).
904907

905908
Groupby/resample/rolling
906909
^^^^^^^^^^^^^^^^^^^^^^^^
907910

908-
-
911+
- Bug in :meth:`DataFrame.groupby.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`)
909912
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
910913
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
911914
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`)
@@ -972,7 +975,9 @@ Other
972975
- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`)
973976
- Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
974977
- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
975-
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:29069`)
978+
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
979+
- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
980+
-
976981

977982
.. _whatsnew_1000.contributors:
978983

pandas/_libs/intervaltree.pxi.in

+33-8
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,20 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
66

77
from pandas._libs.algos import is_monotonic
88

9-
ctypedef fused scalar_t:
10-
float64_t
11-
float32_t
9+
ctypedef fused int_scalar_t:
1210
int64_t
1311
int32_t
12+
float64_t
13+
float32_t
14+
15+
ctypedef fused uint_scalar_t:
1416
uint64_t
17+
float64_t
18+
float32_t
19+
20+
ctypedef fused scalar_t:
21+
int_scalar_t
22+
uint_scalar_t
1523

1624
# ----------------------------------------------------------------------
1725
# IntervalTree
@@ -128,7 +136,12 @@ cdef class IntervalTree(IntervalMixin):
128136
result = Int64Vector()
129137
old_len = 0
130138
for i in range(len(target)):
131-
self.root.query(result, target[i])
139+
try:
140+
self.root.query(result, target[i])
141+
except OverflowError:
142+
# overflow -> no match, which is already handled below
143+
pass
144+
132145
if result.data.n == old_len:
133146
result.append(-1)
134147
elif result.data.n > old_len + 1:
@@ -150,7 +163,12 @@ cdef class IntervalTree(IntervalMixin):
150163
missing = Int64Vector()
151164
old_len = 0
152165
for i in range(len(target)):
153-
self.root.query(result, target[i])
166+
try:
167+
self.root.query(result, target[i])
168+
except OverflowError:
169+
# overflow -> no match, which is already handled below
170+
pass
171+
154172
if result.data.n == old_len:
155173
result.append(-1)
156174
missing.append(i)
@@ -202,19 +220,26 @@ for dtype in ['float32', 'float64', 'int32', 'int64', 'uint64']:
202220
('neither', '<', '<')]:
203221
cmp_left_converse = '<' if cmp_left == '<=' else '<='
204222
cmp_right_converse = '<' if cmp_right == '<=' else '<='
223+
if dtype.startswith('int'):
224+
fused_prefix = 'int_'
225+
elif dtype.startswith('uint'):
226+
fused_prefix = 'uint_'
227+
elif dtype.startswith('float'):
228+
fused_prefix = ''
205229
nodes.append((dtype, dtype.title(),
206230
closed, closed.title(),
207231
cmp_left,
208232
cmp_right,
209233
cmp_left_converse,
210-
cmp_right_converse))
234+
cmp_right_converse,
235+
fused_prefix))
211236

212237
}}
213238

214239
NODE_CLASSES = {}
215240

216241
{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right,
217-
cmp_left_converse, cmp_right_converse in nodes}}
242+
cmp_left_converse, cmp_right_converse, fused_prefix in nodes}}
218243

219244
cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
220245
"""Non-terminal node for an IntervalTree
@@ -317,7 +342,7 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
317342
@cython.wraparound(False)
318343
@cython.boundscheck(False)
319344
@cython.initializedcheck(False)
320-
cpdef query(self, Int64Vector result, scalar_t point):
345+
cpdef query(self, Int64Vector result, {{fused_prefix}}scalar_t point):
321346
"""Recursively query this node and its sub-nodes for intervals that
322347
overlap with the query point.
323348
"""

pandas/_libs/reduction.pyx

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from copy import copy
12
from distutils.version import LooseVersion
23

34
from cython import Py_ssize_t
@@ -15,7 +16,7 @@ from numpy cimport (ndarray,
1516
cnp.import_array()
1617

1718
cimport pandas._libs.util as util
18-
from pandas._libs.lib import maybe_convert_objects
19+
from pandas._libs.lib import maybe_convert_objects, is_scalar
1920

2021

2122
cdef _check_result_array(object obj, Py_ssize_t cnt):
@@ -492,14 +493,19 @@ def apply_frame_axis0(object frame, object f, object names,
492493
# Need to infer if low level index slider will cause segfaults
493494
require_slow_apply = i == 0 and piece is chunk
494495
try:
495-
if piece.index is chunk.index:
496-
piece = piece.copy(deep='all')
497-
else:
496+
if piece.index is not chunk.index:
498497
mutated = True
499498
except AttributeError:
500499
# `piece` might not have an index, could be e.g. an int
501500
pass
502501

502+
if not is_scalar(piece):
503+
# Need to copy data to avoid appending references
504+
if hasattr(piece, "copy"):
505+
piece = piece.copy(deep="all")
506+
else:
507+
piece = copy(piece)
508+
503509
results.append(piece)
504510

505511
# If the data was modified inplace we need to

0 commit comments

Comments
 (0)