pandas-dev · erfannariman · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,7 +4,9 @@ on:
   push:
     branches: master
   pull_request:
-    branches: master
+    branches:
+      - master
+      - 1.1.x
 
 env:
   ENV_FILE: environment.yml

diff --git a/.github/workflows/stale-pr.yml b/.github/workflows/stale-pr.yml
@@ -0,0 +1,21 @@
+name: "Stale PRs"
+on:
+  schedule:
+  # * is a special character in YAML so you have to quote this string
+  - cron: "0 */6 * * *"
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v3
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity."
+        skip-stale-pr-message: true
+        stale-pr-label: "Stale"
+        exempt-pr-labels: "Needs Review,Blocked,Needs Discussion"
+        days-before-stale: 30
+        days-before-close: -1
+        remove-stale-when-updated: false
+        debug-only: false
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ repos:
     -   id: black
         language_version: python3
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.7.7
+    rev: 3.8.3
     hooks:
     -   id: flake8
         language: python_venv
@@ -25,20 +25,8 @@ repos:
           - file
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/pre-commit/mirrors-isort
-    rev: v4.3.21
+    rev: v5.2.2
     hooks:
     -   id: isort
         language: python_venv
         exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
--   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.730
-    hooks:
-     -  id: mypy
-        args:
-          # As long as a some files are excluded from check-untyped-defs
-          # we have to exclude it from the pre-commit hook as the configuration
-          # is based on modules but the hook runs on files.
-          - --no-check-untyped-defs
-          - --follow-imports
-          - skip
-        files: pandas/
diff --git a/.travis.yml b/.travis.yml
@@ -42,10 +42,10 @@ matrix:
 
     - arch: arm64
       env:
-        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
+        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard and not arm_slow)"
 
     - env:
-        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
+        - JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
       services:
         - mysql
         - postgresql
@@ -54,24 +54,11 @@ matrix:
         # Enabling Deprecations when running tests
         # PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
         # See pandas/_testing.py for more details.
-        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
+        - JOB="3.7, coverage" ENV_FILE="ci/deps/travis-37-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
       services:
         - mysql
         - postgresql
 
-    - env:
-        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
-      services:
-        - mysql
-        - postgresql
-  allow_failures:
-    - arch: arm64
-      env:
-        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
-    - dist: bionic
-      env:
-        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
-
 
 before_install:
   - echo "before_install"

diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE
@@ -1,3 +1,7 @@
+Copyright 2014-2019, xarray Developers
+
+--------------------------------------------------------------------------------
+
 Apache License
 Version 2.0, January 2004
 http://www.apache.org/licenses/

diff --git a/Makefile b/Makefile
@@ -25,3 +25,16 @@ doc:
 	cd doc; \
 	python make.py clean; \
 	python make.py html
+
+check:
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_function_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		pandas/
+
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_import_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
+		pandas/
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ its way towards this goal.
 Here are just a few of the things that pandas does well:
 
   - Easy handling of [**missing data**][missing-data] (represented as
-    `NaN`) in floating point as well as non-floating point data
+    `NaN`, `NA`, or `NaT`) in floating point as well as non-floating point data
   - Size mutability: columns can be [**inserted and
     deleted**][insertion-deletion] from DataFrame and higher dimensional
     objects

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -26,7 +26,7 @@
     // The Pythons you'd like to test against.  If not provided, defaults
     // to the current version of Python used to run `asv`.
     // "pythons": ["2.7", "3.4"],
-    "pythons": ["3.6"],
+    "pythons": ["3.8"],
 
     // The matrix of dependencies to test.  Each key is the name of a
     // package (in PyPI) and the values are version numbers.  An empty
@@ -39,7 +39,7 @@
     // followed by the pip installed packages).
     "matrix": {
         "numpy": [],
-        "Cython": ["0.29.16"],
+        "Cython": ["0.29.21"],
         "matplotlib": [],
         "sqlalchemy": [],
         "scipy": [],
@@ -53,6 +53,7 @@
         "xlwt": [],
         "odfpy": [],
         "pytest": [],
+        "jinja2": [],
         // If using Windows with python 2.7 and want to build using the
         // mingw toolchain (rather than MSVC), uncomment the following line.
         // "libpython": [],

diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -6,7 +6,7 @@
 from .pandas_vb_common import tm
 
 try:
-    from pandas.tseries.offsets import Nano, Hour
+    from pandas.tseries.offsets import Hour, Nano
 except ImportError:
     # For compatibility with older versions
     from pandas.core.datetools import *  # noqa

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -219,6 +219,46 @@ def time_to_html_mixed(self):
         self.df2.to_html()
 
 
+class ToNumpy:
+    def setup(self):
+        N = 10000
+        M = 10
+        self.df_tall = DataFrame(np.random.randn(N, M))
+        self.df_wide = DataFrame(np.random.randn(M, N))
+        self.df_mixed_tall = self.df_tall.copy()
+        self.df_mixed_tall["foo"] = "bar"
+        self.df_mixed_tall[0] = period_range("2000", periods=N)
+        self.df_mixed_tall[1] = range(N)
+        self.df_mixed_wide = self.df_wide.copy()
+        self.df_mixed_wide["foo"] = "bar"
+        self.df_mixed_wide[0] = period_range("2000", periods=M)
+        self.df_mixed_wide[1] = range(M)
+
+    def time_to_numpy_tall(self):
+        self.df_tall.to_numpy()
+
+    def time_to_numpy_wide(self):
+        self.df_wide.to_numpy()
+
+    def time_to_numpy_mixed_tall(self):
+        self.df_mixed_tall.to_numpy()
+
+    def time_to_numpy_mixed_wide(self):
+        self.df_mixed_wide.to_numpy()
+
+    def time_values_tall(self):
+        self.df_tall.values
+
+    def time_values_wide(self):
+        self.df_wide.values
+
+    def time_values_mixed_tall(self):
+        self.df_mixed_tall.values
+
+    def time_values_mixed_wide(self):
+        self.df_mixed_wide.values
+
+
 class Repr:
     def setup(self):
         nrows = 10000

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
@@ -7,14 +7,14 @@
 
 try:
     from pandas import (
-        rolling_median,
+        rolling_kurt,
+        rolling_max,
         rolling_mean,
+        rolling_median,
         rolling_min,
-        rolling_max,
-        rolling_var,
         rolling_skew,
-        rolling_kurt,
         rolling_std,
+        rolling_var,
     )
 
     have_rolling_methods = True

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -627,49 +627,63 @@ def time_first(self):
 
 
 class TransformEngine:
-    def setup(self):
+
+    param_names = ["parallel"]
+    params = [[True, False]]
+
+    def setup(self, parallel):
         N = 10 ** 3
         data = DataFrame(
             {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
             columns=[0, 1],
         )
+        self.parallel = parallel
         self.grouper = data.groupby(0)
 
-    def time_series_numba(self):
+    def time_series_numba(self, parallel):
         def function(values, index):
             return values * 5
 
-        self.grouper[1].transform(function, engine="numba")
+        self.grouper[1].transform(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_series_cython(self):
+    def time_series_cython(self, parallel):
         def function(values):
             return values * 5
 
         self.grouper[1].transform(function, engine="cython")
 
-    def time_dataframe_numba(self):
+    def time_dataframe_numba(self, parallel):
         def function(values, index):
             return values * 5
 
-        self.grouper.transform(function, engine="numba")
+        self.grouper.transform(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_dataframe_cython(self):
+    def time_dataframe_cython(self, parallel):
         def function(values):
             return values * 5
 
         self.grouper.transform(function, engine="cython")
 
 
 class AggEngine:
-    def setup(self):
+
+    param_names = ["parallel"]
+    params = [[True, False]]
+
+    def setup(self, parallel):
         N = 10 ** 3
         data = DataFrame(
             {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
             columns=[0, 1],
         )
+        self.parallel = parallel
         self.grouper = data.groupby(0)
 
-    def time_series_numba(self):
+    def time_series_numba(self, parallel):
         def function(values, index):
             total = 0
             for i, value in enumerate(values):
@@ -679,9 +693,11 @@ def function(values, index):
                     total += value * 2
             return total
 
-        self.grouper[1].agg(function, engine="numba")
+        self.grouper[1].agg(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_series_cython(self):
+    def time_series_cython(self, parallel):
         def function(values):
             total = 0
             for i, value in enumerate(values):
@@ -693,7 +709,7 @@ def function(values):
 
         self.grouper[1].agg(function, engine="cython")
 
-    def time_dataframe_numba(self):
+    def time_dataframe_numba(self, parallel):
         def function(values, index):
             total = 0
             for i, value in enumerate(values):
@@ -703,9 +719,11 @@ def function(values, index):
                     total += value * 2
             return total
 
-        self.grouper.agg(function, engine="numba")
+        self.grouper.agg(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_dataframe_cython(self):
+    def time_dataframe_cython(self, parallel):
         def function(values):
             total = 0
             for i, value in enumerate(values):