Skip to content

Commit e5545a7

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into tz-localize-categories
2 parents b4f5d42 + b95a7eb commit e5545a7

31 files changed

+286
-337
lines changed

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ repos:
1515
hooks:
1616
- id: isort
1717
language: python_venv
18+
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
225225

226226
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
227227

228-
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228+
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
229229

230230
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
231231

azure-pipelines.yml

+17-6
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
displayName: 'Running benchmarks'
105105
condition: true
106106
107-
- job: 'Docs'
107+
- job: 'Web_and_Docs'
108108
pool:
109109
vmImage: ubuntu-16.04
110110
timeoutInMinutes: 90
@@ -119,6 +119,11 @@ jobs:
119119
ci/setup_env.sh
120120
displayName: 'Setup environment and build pandas'
121121
122+
- script: |
123+
source activate pandas-dev
124+
python web/pandas_web.py web/pandas --target-path=web/build
125+
displayName: 'Build website'
126+
122127
- script: |
123128
source activate pandas-dev
124129
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
@@ -128,15 +133,21 @@ jobs:
128133
displayName: 'Build documentation'
129134
130135
- script: |
131-
cd doc/build/html
136+
mkdir -p to_deploy/docs
137+
cp -r web/build/* to_deploy/
138+
cp -r doc/build/html/* to_deploy/docs/
139+
displayName: 'Merge website and docs'
140+
141+
- script: |
142+
cd to_deploy
132143
git init
133144
touch .nojekyll
134145
echo "dev.pandas.io" > CNAME
135146
printf "User-agent: *\nDisallow: /" > robots.txt
136147
git add --all .
137148
git config user.email "[email protected]"
138-
git config user.name "pandas-docs-bot"
139-
git commit -m "pandas documentation in master"
149+
git config user.name "pandas-bot"
150+
git commit -m "pandas web and documentation in master"
140151
displayName: 'Create git repo for docs build'
141152
condition : |
142153
and(not(eq(variables['Build.Reason'], 'PullRequest')),
@@ -160,10 +171,10 @@ jobs:
160171
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
161172
162173
- script: |
163-
cd doc/build/html
174+
cd to_deploy
164175
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
165176
git push -f origin master
166-
displayName: 'Publish docs to GitHub pages'
177+
displayName: 'Publish web and docs to GitHub pages'
167178
condition : |
168179
and(not(eq(variables['Build.Reason'], 'PullRequest')),
169180
eq(variables['Build.SourceBranch'], 'refs/heads/master'))

ci/azure/posix.yml

+7
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,21 @@ jobs:
6060
echo "Creating Environment"
6161
ci/setup_env.sh
6262
displayName: 'Setup environment and build pandas'
63+
6364
- script: |
6465
source activate pandas-dev
6566
ci/run_tests.sh
6667
displayName: 'Test'
68+
6769
- script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
70+
displayName: 'Build versions'
71+
6872
- task: PublishTestResults@2
6973
inputs:
7074
testResultsFiles: 'test-data-*.xml'
7175
testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
76+
displayName: 'Publish test results'
77+
7278
- powershell: |
7379
$junitXml = "test-data-single.xml"
7480
$(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
@@ -94,6 +100,7 @@ jobs:
94100
Write-Error "$($matches[1]) tests failed"
95101
}
96102
displayName: 'Check for test failures'
103+
97104
- script: |
98105
source activate pandas-dev
99106
python ci/print_skipped.py

ci/print_skipped.py

+23-35
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,40 @@
11
#!/usr/bin/env python
2-
3-
import math
42
import os
5-
import sys
63
import xml.etree.ElementTree as et
74

85

9-
def parse_results(filename):
6+
def main(filename):
7+
if not os.path.isfile(filename):
8+
return
9+
1010
tree = et.parse(filename)
1111
root = tree.getroot()
12-
skipped = []
13-
1412
current_class = ""
15-
i = 1
16-
assert i - 1 == len(skipped)
1713
for el in root.findall("testcase"):
1814
cn = el.attrib["classname"]
1915
for sk in el.findall("skipped"):
2016
old_class = current_class
2117
current_class = cn
22-
name = "{classname}.{name}".format(
23-
classname=current_class, name=el.attrib["name"]
24-
)
25-
msg = sk.attrib["message"]
26-
out = ""
2718
if old_class != current_class:
28-
ndigits = int(math.log(i, 10) + 1)
29-
30-
# 4 for : + space + # + space
31-
out += "-" * (len(name + msg) + 4 + ndigits) + "\n"
32-
out += "#{i} {name}: {msg}".format(i=i, name=name, msg=msg)
33-
skipped.append(out)
34-
i += 1
35-
assert i - 1 == len(skipped)
36-
assert i - 1 == len(skipped)
37-
# assert len(skipped) == int(root.attrib['skip'])
38-
return "\n".join(skipped)
39-
40-
41-
def main():
42-
test_files = ["test-data-single.xml", "test-data-multiple.xml", "test-data.xml"]
43-
44-
print("SKIPPED TESTS:")
45-
for fn in test_files:
46-
if os.path.isfile(fn):
47-
print(parse_results(fn))
48-
return 0
19+
yield None
20+
yield {
21+
"class_name": current_class,
22+
"test_name": el.attrib["name"],
23+
"message": sk.attrib["message"],
24+
}
4925

5026

5127
if __name__ == "__main__":
52-
sys.exit(main())
28+
print("SKIPPED TESTS:")
29+
i = 1
30+
for file_type in ("-single", "-multiple", ""):
31+
for test_data in main("test-data{}.xml".format(file_type)):
32+
if test_data is None:
33+
print("-" * 80)
34+
else:
35+
print(
36+
"#{i} {class_name}.{test_name}: {message}".format(
37+
**dict(test_data, i=i)
38+
)
39+
)
40+
i += 1

ci/run_tests.sh

+3-10
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
1-
#!/bin/bash
1+
#!/bin/bash -e
22

3-
set -e
4-
5-
if [ "$DOC" ]; then
6-
echo "We are not running pytest as this is a doc-build"
7-
exit 0
8-
fi
9-
10-
# Workaround for pytest-xdist flaky collection order
3+
# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set)
114
# https://github.com/pytest-dev/pytest/issues/920
125
# https://github.com/pytest-dev/pytest/issues/1075
136
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
@@ -16,7 +9,7 @@ if [ -n "$LOCALE_OVERRIDE" ]; then
169
export LC_ALL="$LOCALE_OVERRIDE"
1710
export LANG="$LOCALE_OVERRIDE"
1811
PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
19-
if [[ "$LOCALE_OVERIDE" != "$PANDAS_LOCALE" ]]; then
12+
if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
2013
echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
2114
# TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
2215
# exit 1

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ If installed, we now require:
828828
| pytest (dev) | 4.0.2 | |
829829
+-----------------+-----------------+----------+
830830
831-
For `optional libraries <https://dev.pandas.io/install.html#dependencies>`_ the general recommendation is to use the latest version.
831+
For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
832832
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
833833
Optional libraries below the lowest tested version may still work, but are not considered supported.
834834

doc/source/whatsnew/v1.0.0.rst

+6-2
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ Removal of prior version deprecations/changes
109109
- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`)
110110
- Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
111111
- Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
112+
- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`)
113+
-
112114

113115
.. _whatsnew_1000.performance:
114116

@@ -149,7 +151,7 @@ Datetimelike
149151
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
150152
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
151153
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
152-
-
154+
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
153155

154156

155157
Timedelta
@@ -220,6 +222,7 @@ I/O
220222
- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`)
221223
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
222224
- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`)
225+
- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`)
223226

224227
Plotting
225228
^^^^^^^^
@@ -230,6 +233,7 @@ Plotting
230233
- Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`)
231234
- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`)
232235
- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`)
236+
- Bug where :meth:`DataFrame.boxplot` would not accept a `color` parameter like `DataFrame.plot.box` (:issue:`26214`)
233237
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
234238

235239
Groupby/resample/rolling
@@ -243,8 +247,8 @@ Groupby/resample/rolling
243247
Reshaping
244248
^^^^^^^^^
245249

250+
- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`)
246251
- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`)
247-
-
248252

249253
Sparse
250254
^^^^^^

pandas/core/apply.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -204,17 +204,20 @@ def apply_empty_result(self):
204204
from pandas import Series
205205

206206
if not should_reduce:
207-
208-
EMPTY_SERIES = Series([])
209207
try:
210-
r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
208+
r = self.f(Series([]))
211209
except Exception:
212210
pass
213211
else:
214212
should_reduce = not isinstance(r, Series)
215213

216214
if should_reduce:
217-
return self.obj._constructor_sliced(np.nan, index=self.agg_axis)
215+
if len(self.agg_axis):
216+
r = self.f(Series([]))
217+
else:
218+
r = np.nan
219+
220+
return self.obj._constructor_sliced(r, index=self.agg_axis)
218221
else:
219222
return self.obj.copy()
220223

pandas/core/arrays/categorical.py

+1-18
Original file line numberDiff line numberDiff line change
@@ -1353,24 +1353,7 @@ def __setstate__(self, state):
13531353
if not isinstance(state, dict):
13541354
raise Exception("invalid pickle state")
13551355

1356-
# Provide compatibility with pre-0.15.0 Categoricals.
1357-
if "_categories" not in state and "_levels" in state:
1358-
state["_categories"] = self.dtype.validate_categories(state.pop("_levels"))
1359-
if "_codes" not in state and "labels" in state:
1360-
state["_codes"] = coerce_indexer_dtype(
1361-
state.pop("labels"), state["_categories"]
1362-
)
1363-
1364-
# 0.16.0 ordered change
1365-
if "_ordered" not in state:
1366-
1367-
# >=15.0 < 0.16.0
1368-
if "ordered" in state:
1369-
state["_ordered"] = state.pop("ordered")
1370-
else:
1371-
state["_ordered"] = False
1372-
1373-
# 0.21.0 CategoricalDtype change
1356+
# compat with pre 0.21.0 CategoricalDtype change
13741357
if "_dtype" not in state:
13751358
state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"])
13761359

pandas/io/parquet.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pandas import DataFrame, get_option
99

10-
from pandas.io.common import get_filepath_or_buffer, is_s3_url
10+
from pandas.io.common import get_filepath_or_buffer, is_gcs_url, is_s3_url
1111

1212

1313
def get_engine(engine):
@@ -159,12 +159,12 @@ def write(
159159
if partition_cols is not None:
160160
kwargs["file_scheme"] = "hive"
161161

162-
if is_s3_url(path):
163-
# path is s3:// so we need to open the s3file in 'wb' mode.
162+
if is_s3_url(path) or is_gcs_url(path):
163+
# if path is s3:// or gs:// we need to open the file in 'wb' mode.
164164
# TODO: Support 'ab'
165165

166166
path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
167-
# And pass the opened s3file to the fastparquet internal impl.
167+
# And pass the opened file to the fastparquet internal impl.
168168
kwargs["open_with"] = lambda path, _: path
169169
else:
170170
path, _, _, _ = get_filepath_or_buffer(path)

pandas/plotting/_matplotlib/boxplot.py

+31-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from matplotlib.artist import setp
55
import numpy as np
66

7+
from pandas.core.dtypes.common import is_dict_like
78
from pandas.core.dtypes.generic import ABCSeries
89
from pandas.core.dtypes.missing import remove_na_arraylike
910

@@ -250,13 +251,38 @@ def boxplot(
250251
def _get_colors():
251252
# num_colors=3 is required as method maybe_color_bp takes the colors
252253
# in positions 0 and 2.
253-
return _get_standard_colors(color=kwds.get("color"), num_colors=3)
254+
# if colors not provided, use same defaults as DataFrame.plot.box
255+
result = _get_standard_colors(num_colors=3)
256+
result = np.take(result, [0, 0, 2])
257+
result = np.append(result, "k")
258+
259+
colors = kwds.pop("color", None)
260+
if colors:
261+
if is_dict_like(colors):
262+
# replace colors in result array with user-specified colors
263+
# taken from the colors dict parameter
264+
# "boxes" value placed in position 0, "whiskers" in 1, etc.
265+
valid_keys = ["boxes", "whiskers", "medians", "caps"]
266+
key_to_index = dict(zip(valid_keys, range(4)))
267+
for key, value in colors.items():
268+
if key in valid_keys:
269+
result[key_to_index[key]] = value
270+
else:
271+
raise ValueError(
272+
"color dict contains invalid "
273+
"key '{0}' "
274+
"The key must be either {1}".format(key, valid_keys)
275+
)
276+
else:
277+
result.fill(colors)
278+
279+
return result
254280

255281
def maybe_color_bp(bp):
256-
if "color" not in kwds:
257-
setp(bp["boxes"], color=colors[0], alpha=1)
258-
setp(bp["whiskers"], color=colors[0], alpha=1)
259-
setp(bp["medians"], color=colors[2], alpha=1)
282+
setp(bp["boxes"], color=colors[0], alpha=1)
283+
setp(bp["whiskers"], color=colors[1], alpha=1)
284+
setp(bp["medians"], color=colors[2], alpha=1)
285+
setp(bp["caps"], color=colors[3], alpha=1)
260286

261287
def plot_group(keys, values, ax):
262288
keys = [pprint_thing(x) for x in keys]

0 commit comments

Comments
 (0)