Skip to content

Commit 689c59a

Browse files
author
Ayowolet
committed
Merge remote-tracking branch 'upstream/master' into Validate-blanks
2 parents 1fc9b0f + 4ac7f9d commit 689c59a

27 files changed

+167
-254
lines changed

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ repos:
1515
hooks:
1616
- id: isort
1717
language: python_venv
18+
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
225225

226226
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
227227

228-
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228+
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
229229

230230
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
231231

azure-pipelines.yml

+17-6
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
displayName: 'Running benchmarks'
105105
condition: true
106106
107-
- job: 'Docs'
107+
- job: 'Web_and_Docs'
108108
pool:
109109
vmImage: ubuntu-16.04
110110
timeoutInMinutes: 90
@@ -119,6 +119,11 @@ jobs:
119119
ci/setup_env.sh
120120
displayName: 'Setup environment and build pandas'
121121
122+
- script: |
123+
source activate pandas-dev
124+
python web/pandas_web.py web/pandas --target-path=web/build
125+
displayName: 'Build website'
126+
122127
- script: |
123128
source activate pandas-dev
124129
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
@@ -128,15 +133,21 @@ jobs:
128133
displayName: 'Build documentation'
129134
130135
- script: |
131-
cd doc/build/html
136+
mkdir -p to_deploy/docs
137+
cp -r web/build/* to_deploy/
138+
cp -r doc/build/html/* to_deploy/docs/
139+
displayName: 'Merge website and docs'
140+
141+
- script: |
142+
cd to_deploy
132143
git init
133144
touch .nojekyll
134145
echo "dev.pandas.io" > CNAME
135146
printf "User-agent: *\nDisallow: /" > robots.txt
136147
git add --all .
137148
git config user.email "[email protected]"
138-
git config user.name "pandas-docs-bot"
139-
git commit -m "pandas documentation in master"
149+
git config user.name "pandas-bot"
150+
git commit -m "pandas web and documentation in master"
140151
displayName: 'Create git repo for docs build'
141152
condition : |
142153
and(not(eq(variables['Build.Reason'], 'PullRequest')),
@@ -160,10 +171,10 @@ jobs:
160171
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
161172
162173
- script: |
163-
cd doc/build/html
174+
cd to_deploy
164175
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
165176
git push -f origin master
166-
displayName: 'Publish docs to GitHub pages'
177+
displayName: 'Publish web and docs to GitHub pages'
167178
condition : |
168179
and(not(eq(variables['Build.Reason'], 'PullRequest')),
169180
eq(variables['Build.SourceBranch'], 'refs/heads/master'))

ci/azure/posix.yml

+7
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,21 @@ jobs:
6060
echo "Creating Environment"
6161
ci/setup_env.sh
6262
displayName: 'Setup environment and build pandas'
63+
6364
- script: |
6465
source activate pandas-dev
6566
ci/run_tests.sh
6667
displayName: 'Test'
68+
6769
- script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
70+
displayName: 'Build versions'
71+
6872
- task: PublishTestResults@2
6973
inputs:
7074
testResultsFiles: 'test-data-*.xml'
7175
testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
76+
displayName: 'Publish test results'
77+
7278
- powershell: |
7379
$junitXml = "test-data-single.xml"
7480
$(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
@@ -94,6 +100,7 @@ jobs:
94100
Write-Error "$($matches[1]) tests failed"
95101
}
96102
displayName: 'Check for test failures'
103+
97104
- script: |
98105
source activate pandas-dev
99106
python ci/print_skipped.py

ci/print_skipped.py

+23-35
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,40 @@
11
#!/usr/bin/env python
2-
3-
import math
42
import os
5-
import sys
63
import xml.etree.ElementTree as et
74

85

9-
def parse_results(filename):
6+
def main(filename):
7+
if not os.path.isfile(filename):
8+
return
9+
1010
tree = et.parse(filename)
1111
root = tree.getroot()
12-
skipped = []
13-
1412
current_class = ""
15-
i = 1
16-
assert i - 1 == len(skipped)
1713
for el in root.findall("testcase"):
1814
cn = el.attrib["classname"]
1915
for sk in el.findall("skipped"):
2016
old_class = current_class
2117
current_class = cn
22-
name = "{classname}.{name}".format(
23-
classname=current_class, name=el.attrib["name"]
24-
)
25-
msg = sk.attrib["message"]
26-
out = ""
2718
if old_class != current_class:
28-
ndigits = int(math.log(i, 10) + 1)
29-
30-
# 4 for : + space + # + space
31-
out += "-" * (len(name + msg) + 4 + ndigits) + "\n"
32-
out += "#{i} {name}: {msg}".format(i=i, name=name, msg=msg)
33-
skipped.append(out)
34-
i += 1
35-
assert i - 1 == len(skipped)
36-
assert i - 1 == len(skipped)
37-
# assert len(skipped) == int(root.attrib['skip'])
38-
return "\n".join(skipped)
39-
40-
41-
def main():
42-
test_files = ["test-data-single.xml", "test-data-multiple.xml", "test-data.xml"]
43-
44-
print("SKIPPED TESTS:")
45-
for fn in test_files:
46-
if os.path.isfile(fn):
47-
print(parse_results(fn))
48-
return 0
19+
yield None
20+
yield {
21+
"class_name": current_class,
22+
"test_name": el.attrib["name"],
23+
"message": sk.attrib["message"],
24+
}
4925

5026

5127
if __name__ == "__main__":
52-
sys.exit(main())
28+
print("SKIPPED TESTS:")
29+
i = 1
30+
for file_type in ("-single", "-multiple", ""):
31+
for test_data in main("test-data{}.xml".format(file_type)):
32+
if test_data is None:
33+
print("-" * 80)
34+
else:
35+
print(
36+
"#{i} {class_name}.{test_name}: {message}".format(
37+
**dict(test_data, i=i)
38+
)
39+
)
40+
i += 1

ci/run_tests.sh

+3-10
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
1-
#!/bin/bash
1+
#!/bin/bash -e
22

3-
set -e
4-
5-
if [ "$DOC" ]; then
6-
echo "We are not running pytest as this is a doc-build"
7-
exit 0
8-
fi
9-
10-
# Workaround for pytest-xdist flaky collection order
3+
# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set)
114
# https://github.com/pytest-dev/pytest/issues/920
125
# https://github.com/pytest-dev/pytest/issues/1075
136
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
@@ -16,7 +9,7 @@ if [ -n "$LOCALE_OVERRIDE" ]; then
169
export LC_ALL="$LOCALE_OVERRIDE"
1710
export LANG="$LOCALE_OVERRIDE"
1811
PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
19-
if [[ "$LOCALE_OVERIDE" != "$PANDAS_LOCALE" ]]; then
12+
if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
2013
echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
2114
# TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
2215
# exit 1

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ If installed, we now require:
828828
| pytest (dev) | 4.0.2 | |
829829
+-----------------+-----------------+----------+
830830
831-
For `optional libraries <https://dev.pandas.io/install.html#dependencies>`_ the general recommendation is to use the latest version.
831+
For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
832832
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
833833
Optional libraries below the lowest tested version may still work, but are not considered supported.
834834

doc/source/whatsnew/v1.0.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ Removal of prior version deprecations/changes
109109
- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`)
110110
- Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
111111
- Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
112+
- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`)
113+
-
112114

113115
.. _whatsnew_1000.performance:
114116

@@ -147,7 +149,7 @@ Datetimelike
147149
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
148150
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
149151
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
150-
-
152+
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
151153

152154

153155
Timedelta
@@ -218,6 +220,7 @@ I/O
218220
- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`)
219221
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
220222
- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`)
223+
- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`)
221224

222225
Plotting
223226
^^^^^^^^

pandas/core/arrays/categorical.py

+1-18
Original file line numberDiff line numberDiff line change
@@ -1353,24 +1353,7 @@ def __setstate__(self, state):
13531353
if not isinstance(state, dict):
13541354
raise Exception("invalid pickle state")
13551355

1356-
# Provide compatibility with pre-0.15.0 Categoricals.
1357-
if "_categories" not in state and "_levels" in state:
1358-
state["_categories"] = self.dtype.validate_categories(state.pop("_levels"))
1359-
if "_codes" not in state and "labels" in state:
1360-
state["_codes"] = coerce_indexer_dtype(
1361-
state.pop("labels"), state["_categories"]
1362-
)
1363-
1364-
# 0.16.0 ordered change
1365-
if "_ordered" not in state:
1366-
1367-
# >=15.0 < 0.16.0
1368-
if "ordered" in state:
1369-
state["_ordered"] = state.pop("ordered")
1370-
else:
1371-
state["_ordered"] = False
1372-
1373-
# 0.21.0 CategoricalDtype change
1356+
# compat with pre 0.21.0 CategoricalDtype change
13741357
if "_dtype" not in state:
13751358
state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"])
13761359

pandas/io/parquet.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pandas import DataFrame, get_option
99

10-
from pandas.io.common import get_filepath_or_buffer, is_s3_url
10+
from pandas.io.common import get_filepath_or_buffer, is_gcs_url, is_s3_url
1111

1212

1313
def get_engine(engine):
@@ -159,12 +159,12 @@ def write(
159159
if partition_cols is not None:
160160
kwargs["file_scheme"] = "hive"
161161

162-
if is_s3_url(path):
163-
# path is s3:// so we need to open the s3file in 'wb' mode.
162+
if is_s3_url(path) or is_gcs_url(path):
163+
# if path is s3:// or gs:// we need to open the file in 'wb' mode.
164164
# TODO: Support 'ab'
165165

166166
path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
167-
# And pass the opened s3file to the fastparquet internal impl.
167+
# And pass the opened file to the fastparquet internal impl.
168168
kwargs["open_with"] = lambda path, _: path
169169
else:
170170
path, _, _, _ = get_filepath_or_buffer(path)

pandas/tests/internals/test_internals.py

+22-21
Original file line numberDiff line numberDiff line change
@@ -528,32 +528,33 @@ def test_as_array_datetime_tz(self):
528528
assert mgr.get("g").dtype == "datetime64[ns, CET]"
529529
assert mgr.as_array().dtype == "object"
530530

531-
def test_astype(self):
531+
@pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
532+
def test_astype(self, t):
532533
# coerce all
533534
mgr = create_mgr("c: f4; d: f2; e: f8")
534-
for t in ["float16", "float32", "float64", "int32", "int64"]:
535-
t = np.dtype(t)
536-
tmgr = mgr.astype(t)
537-
assert tmgr.get("c").dtype.type == t
538-
assert tmgr.get("d").dtype.type == t
539-
assert tmgr.get("e").dtype.type == t
535+
536+
t = np.dtype(t)
537+
tmgr = mgr.astype(t)
538+
assert tmgr.get("c").dtype.type == t
539+
assert tmgr.get("d").dtype.type == t
540+
assert tmgr.get("e").dtype.type == t
540541

541542
# mixed
542543
mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
543-
for t in ["float16", "float32", "float64", "int32", "int64"]:
544-
t = np.dtype(t)
545-
tmgr = mgr.astype(t, errors="ignore")
546-
assert tmgr.get("c").dtype.type == t
547-
assert tmgr.get("e").dtype.type == t
548-
assert tmgr.get("f").dtype.type == t
549-
assert tmgr.get("g").dtype.type == t
550-
551-
assert tmgr.get("a").dtype.type == np.object_
552-
assert tmgr.get("b").dtype.type == np.object_
553-
if t != np.int64:
554-
assert tmgr.get("d").dtype.type == np.datetime64
555-
else:
556-
assert tmgr.get("d").dtype.type == t
544+
545+
t = np.dtype(t)
546+
tmgr = mgr.astype(t, errors="ignore")
547+
assert tmgr.get("c").dtype.type == t
548+
assert tmgr.get("e").dtype.type == t
549+
assert tmgr.get("f").dtype.type == t
550+
assert tmgr.get("g").dtype.type == t
551+
552+
assert tmgr.get("a").dtype.type == np.object_
553+
assert tmgr.get("b").dtype.type == np.object_
554+
if t != np.int64:
555+
assert tmgr.get("d").dtype.type == np.datetime64
556+
else:
557+
assert tmgr.get("d").dtype.type == t
557558

558559
def test_convert(self):
559560
def _compare(old_mgr, new_mgr):
578 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)