pandas-dev
diff --git a/‎.pre-commit-config.yaml
+1 b/‎.pre-commit-config.yaml
+1
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎azure-pipelines.yml
+17-6 b/‎azure-pipelines.yml
+17-6
diff --git a/‎ci/azure/posix.yml
+7 b/‎ci/azure/posix.yml
+7
diff --git a/‎ci/print_skipped.py
+23-35 b/‎ci/print_skipped.py
+23-35
diff --git a/‎ci/run_tests.sh
+3-10 b/‎ci/run_tests.sh
+3-10
diff --git a/‎doc/source/whatsnew/v0.25.0.rst
+1-1 b/‎doc/source/whatsnew/v0.25.0.rst
+1-1
diff --git a/‎doc/source/whatsnew/v1.0.0.rst
+6-2 b/‎doc/source/whatsnew/v1.0.0.rst
+6-2
diff --git a/‎pandas/core/apply.py
+7-4 b/‎pandas/core/apply.py
+7-4
diff --git a/‎pandas/core/arrays/categorical.py
+1-18 b/‎pandas/core/arrays/categorical.py
+1-18
diff --git a/‎pandas/io/parquet.py
+4-4 b/‎pandas/io/parquet.py
+4-4
diff --git a/‎pandas/plotting/_matplotlib/boxplot.py
+31-5 b/‎pandas/plotting/_matplotlib/boxplot.py
+31-5
@@ -15,3 +15,4 @@ repos:
     hooks:
     -   id: isort
         language: python_venv
+        exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
@@ -225,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
 
 All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
 
-A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
+A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
 
 If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
 
 
@@ -104,7 +104,7 @@ jobs:
     displayName: 'Running benchmarks'
     condition: true
 
-- job: 'Docs'
+- job: 'Web_and_Docs'
   pool:
     vmImage: ubuntu-16.04
   timeoutInMinutes: 90
@@ -119,6 +119,11 @@ jobs:
       ci/setup_env.sh
     displayName: 'Setup environment and build pandas'
 
+  - script: |
+      source activate pandas-dev
+      python web/pandas_web.py web/pandas --target-path=web/build
+    displayName: 'Build website'
+
   - script: |
       source activate pandas-dev
       # Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
@@ -128,15 +133,21 @@ jobs:
     displayName: 'Build documentation'
 
   - script: |
-      cd doc/build/html
+      mkdir -p to_deploy/docs
+      cp -r web/build/* to_deploy/
+      cp -r doc/build/html/* to_deploy/docs/
+    displayName: 'Merge website and docs'
+
+  - script: |
+      cd to_deploy
       git init
       touch .nojekyll
       echo "dev.pandas.io" > CNAME
       printf "User-agent: *\nDisallow: /" > robots.txt
       git add --all .
       git config user.email "[email protected]"
-      git config user.name "pandas-docs-bot"
-      git commit -m "pandas documentation in master"
+      git config user.name "pandas-bot"
+      git commit -m "pandas web and documentation in master"
     displayName: 'Create git repo for docs build'
     condition : |
       and(not(eq(variables['Build.Reason'], 'PullRequest')),
@@ -160,10 +171,10 @@ jobs:
           eq(variables['Build.SourceBranch'], 'refs/heads/master'))
 
   - script: |
-      cd doc/build/html
+      cd to_deploy
       git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
       git push -f origin master
-    displayName: 'Publish docs to GitHub pages'
+    displayName: 'Publish web and docs to GitHub pages'
     condition : |
       and(not(eq(variables['Build.Reason'], 'PullRequest')),
           eq(variables['Build.SourceBranch'], 'refs/heads/master'))
@@ -60,15 +60,21 @@ jobs:
         echo "Creating Environment"
         ci/setup_env.sh
       displayName: 'Setup environment and build pandas'
+
     - script: |
         source activate pandas-dev
         ci/run_tests.sh
       displayName: 'Test'
+
     - script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
+      displayName: 'Build versions'
+
     - task: PublishTestResults@2
       inputs:
         testResultsFiles: 'test-data-*.xml'
         testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
+      displayName: 'Publish test results'
+
     - powershell: |
         $junitXml = "test-data-single.xml"
         $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
@@ -94,6 +100,7 @@ jobs:
           Write-Error "$($matches[1]) tests failed"
         }
       displayName: 'Check for test failures'
+
     - script: |
         source activate pandas-dev
         python ci/print_skipped.py
 
@@ -1,52 +1,40 @@
 #!/usr/bin/env python
-
-import math
 import os
-import sys
 import xml.etree.ElementTree as et
 
 
-def parse_results(filename):
+def main(filename):
+    if not os.path.isfile(filename):
+        return
+
     tree = et.parse(filename)
     root = tree.getroot()
-    skipped = []
-
     current_class = ""
-    i = 1
-    assert i - 1 == len(skipped)
     for el in root.findall("testcase"):
         cn = el.attrib["classname"]
         for sk in el.findall("skipped"):
             old_class = current_class
             current_class = cn
-            name = "{classname}.{name}".format(
-                classname=current_class, name=el.attrib["name"]
-            )
-            msg = sk.attrib["message"]
-            out = ""
             if old_class != current_class:
-                ndigits = int(math.log(i, 10) + 1)
-
-                # 4 for : + space + # + space
-                out += "-" * (len(name + msg) + 4 + ndigits) + "\n"
-            out += "#{i} {name}: {msg}".format(i=i, name=name, msg=msg)
-            skipped.append(out)
-            i += 1
-            assert i - 1 == len(skipped)
-    assert i - 1 == len(skipped)
-    # assert len(skipped) == int(root.attrib['skip'])
-    return "\n".join(skipped)
-
-
-def main():
-    test_files = ["test-data-single.xml", "test-data-multiple.xml", "test-data.xml"]
-
-    print("SKIPPED TESTS:")
-    for fn in test_files:
-        if os.path.isfile(fn):
-            print(parse_results(fn))
-    return 0
+                yield None
+            yield {
+                "class_name": current_class,
+                "test_name": el.attrib["name"],
+                "message": sk.attrib["message"],
+            }
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    print("SKIPPED TESTS:")
+    i = 1
+    for file_type in ("-single", "-multiple", ""):
+        for test_data in main("test-data{}.xml".format(file_type)):
+            if test_data is None:
+                print("-" * 80)
+            else:
+                print(
+                    "#{i} {class_name}.{test_name}: {message}".format(
+                        **dict(test_data, i=i)
+                    )
+                )
+                i += 1
@@ -1,13 +1,6 @@
-#!/bin/bash
+#!/bin/bash -e
 
-set -e
-
-if [ "$DOC" ]; then
-    echo "We are not running pytest as this is a doc-build"
-    exit 0
-fi
-
-# Workaround for pytest-xdist flaky collection order
+# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set)
 # https://github.com/pytest-dev/pytest/issues/920
 # https://github.com/pytest-dev/pytest/issues/1075
 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
@@ -16,7 +9,7 @@ if [ -n "$LOCALE_OVERRIDE" ]; then
     export LC_ALL="$LOCALE_OVERRIDE"
     export LANG="$LOCALE_OVERRIDE"
     PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
-    if [[ "$LOCALE_OVERIDE" != "$PANDAS_LOCALE" ]]; then
+    if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
         echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
         # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
         # exit 1
 
@@ -828,7 +828,7 @@ If installed, we now require:
 | pytest (dev)    | 4.0.2           |          |
 +-----------------+-----------------+----------+
 
-For `optional libraries <https://dev.pandas.io/install.html#dependencies>`_ the general recommendation is to use the latest version.
+For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
 The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
 Optional libraries below the lowest tested version may still work, but are not considered supported.
 
 
@@ -109,6 +109,8 @@ Removal of prior version deprecations/changes
 - :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`)
 - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
 - Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
+- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`)
+-
 
 .. _whatsnew_1000.performance:
 
@@ -149,7 +151,7 @@ Datetimelike
 - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
 - Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
 - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
--
+- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
 
 
 Timedelta
@@ -220,6 +222,7 @@ I/O
 - Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`)
 - Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
 - Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`)
+- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`)
 
 Plotting
 ^^^^^^^^
@@ -230,6 +233,7 @@ Plotting
 - Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`)
 - Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`)
 - Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`)
+- Bug where :meth:`DataFrame.boxplot` would not accept a `color` parameter like `DataFrame.plot.box` (:issue:`26214`)
 - :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
 
 Groupby/resample/rolling
@@ -243,8 +247,8 @@ Groupby/resample/rolling
 Reshaping
 ^^^^^^^^^
 
+- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`)
 - Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`)
--
 
 Sparse
 ^^^^^^
 
@@ -204,17 +204,20 @@ def apply_empty_result(self):
         from pandas import Series
 
         if not should_reduce:
-
-            EMPTY_SERIES = Series([])
             try:
-                r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
+                r = self.f(Series([]))
             except Exception:
                 pass
             else:
                 should_reduce = not isinstance(r, Series)
 
         if should_reduce:
-            return self.obj._constructor_sliced(np.nan, index=self.agg_axis)
+            if len(self.agg_axis):
+                r = self.f(Series([]))
+            else:
+                r = np.nan
+
+            return self.obj._constructor_sliced(r, index=self.agg_axis)
         else:
             return self.obj.copy()
 
 
@@ -1353,24 +1353,7 @@ def __setstate__(self, state):
         if not isinstance(state, dict):
             raise Exception("invalid pickle state")
 
-        # Provide compatibility with pre-0.15.0 Categoricals.
-        if "_categories" not in state and "_levels" in state:
-            state["_categories"] = self.dtype.validate_categories(state.pop("_levels"))
-        if "_codes" not in state and "labels" in state:
-            state["_codes"] = coerce_indexer_dtype(
-                state.pop("labels"), state["_categories"]
-            )
-
-        # 0.16.0 ordered change
-        if "_ordered" not in state:
-
-            # >=15.0 < 0.16.0
-            if "ordered" in state:
-                state["_ordered"] = state.pop("ordered")
-            else:
-                state["_ordered"] = False
-
-        # 0.21.0 CategoricalDtype change
+        # compat with pre 0.21.0 CategoricalDtype change
         if "_dtype" not in state:
             state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"])
 
 
@@ -7,7 +7,7 @@
 
 from pandas import DataFrame, get_option
 
-from pandas.io.common import get_filepath_or_buffer, is_s3_url
+from pandas.io.common import get_filepath_or_buffer, is_gcs_url, is_s3_url
 
 
 def get_engine(engine):
@@ -159,12 +159,12 @@ def write(
         if partition_cols is not None:
             kwargs["file_scheme"] = "hive"
 
-        if is_s3_url(path):
-            # path is s3:// so we need to open the s3file in 'wb' mode.
+        if is_s3_url(path) or is_gcs_url(path):
+            # if path is s3:// or gs:// we need to open the file in 'wb' mode.
             # TODO: Support 'ab'
 
             path, _, _, _ = get_filepath_or_buffer(path, mode="wb")
-            # And pass the opened s3file to the fastparquet internal impl.
+            # And pass the opened file to the fastparquet internal impl.
             kwargs["open_with"] = lambda path, _: path
         else:
             path, _, _, _ = get_filepath_or_buffer(path)
 
@@ -4,6 +4,7 @@
 from matplotlib.artist import setp
 import numpy as np
 
+from pandas.core.dtypes.common import is_dict_like
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import remove_na_arraylike
 
@@ -250,13 +251,38 @@ def boxplot(
     def _get_colors():
         #  num_colors=3 is required as method maybe_color_bp takes the colors
         #  in positions 0 and 2.
-        return _get_standard_colors(color=kwds.get("color"), num_colors=3)
+        #  if colors not provided, use same defaults as DataFrame.plot.box
+        result = _get_standard_colors(num_colors=3)
+        result = np.take(result, [0, 0, 2])
+        result = np.append(result, "k")
+
+        colors = kwds.pop("color", None)
+        if colors:
+            if is_dict_like(colors):
+                # replace colors in result array with user-specified colors
+                # taken from the colors dict parameter
+                # "boxes" value placed in position 0, "whiskers" in 1, etc.
+                valid_keys = ["boxes", "whiskers", "medians", "caps"]
+                key_to_index = dict(zip(valid_keys, range(4)))
+                for key, value in colors.items():
+                    if key in valid_keys:
+                        result[key_to_index[key]] = value
+                    else:
+                        raise ValueError(
+                            "color dict contains invalid "
+                            "key '{0}' "
+                            "The key must be either {1}".format(key, valid_keys)
+                        )
+            else:
+                result.fill(colors)
+
+        return result
 
     def maybe_color_bp(bp):
-        if "color" not in kwds:
-            setp(bp["boxes"], color=colors[0], alpha=1)
-            setp(bp["whiskers"], color=colors[0], alpha=1)
-            setp(bp["medians"], color=colors[2], alpha=1)
+        setp(bp["boxes"], color=colors[0], alpha=1)
+        setp(bp["whiskers"], color=colors[1], alpha=1)
+        setp(bp["medians"], color=colors[2], alpha=1)
+        setp(bp["caps"], color=colors[3], alpha=1)
 
     def plot_group(keys, values, ax):
         keys = [pprint_thing(x) for x in keys]