pandas-dev · GYHHAHA · Nov 13, 2020 · Nov 13, 2020 · Nov 13, 2020 · Nov 13, 2020
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ jobs:
     steps:
 
     - name: Setting conda path
-      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
+      run: echo "::add-path::${HOME}/miniconda3/bin"
 
     - name: Checkout
       uses: actions/checkout@v1
@@ -98,7 +98,7 @@ jobs:
     steps:
 
     - name: Setting conda path
-      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
+      run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
 
     - name: Checkout
       uses: actions/checkout@v1

diff --git a/.gitignore b/.gitignore
@@ -12,7 +12,6 @@
 *.log
 *.swp
 *.pdb
-*.zip
 .project
 .pydevproject
 .settings

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -26,7 +26,7 @@ repos:
         name: isort (cython)
         types: [cython]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.7.4
+    rev: v2.7.3
     hooks:
     -   id: pyupgrade
         args: [--py37-plus]

diff --git a/.travis.yml b/.travis.yml
@@ -35,6 +35,11 @@ matrix:
   fast_finish: true
 
   include:
+    - dist: bionic
+      python: 3.9-dev
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
+
     - env:
       - JOB="3.8, slow" ENV_FILE="ci/deps/travis-38-slow.yaml" PATTERN="slow" SQL="1"
       services:
@@ -89,7 +94,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - source activate pandas-dev
+  - if [ "$JOB" != "3.9-dev" ]; then source activate pandas-dev; fi
   - ci/run_tests.sh
 
 after_script:

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM quay.io/condaforge/miniforge3
+FROM continuumio/miniconda3
 
 # if you forked pandas, you can pass in your own GitHub username to use your fork
 # i.e. gh_username=myname
@@ -15,6 +15,10 @@ RUN apt-get update \
     # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
     && apt-get -y install git iproute2 procps iproute2 lsb-release \
     #
+    # Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
+    # needed to build pandas C extensions
+    && apt-get -y install build-essential \
+    #
     # cleanup
     && apt-get autoremove -y \
     && apt-get clean -y \
@@ -35,14 +39,9 @@ RUN mkdir "$pandas_home" \
 # we just update the base/root one from the 'environment.yml' file instead of creating a new one.
 #
 # Set up environment
-RUN conda install -y mamba
-RUN mamba env update -n base -f "$pandas_home/environment.yml"
+RUN conda env update -n base -f "$pandas_home/environment.yml"
 
 # Build C extensions and pandas
-SHELL ["/bin/bash", "-c"]
-RUN . /opt/conda/etc/profile.d/conda.sh \
-    && conda activate base \
-    && cd "$pandas_home" \
-    && export \
-    && python setup.py build_ext -j 4 \
+RUN cd "$pandas_home" \
+    && python setup.py build_ext --inplace -j 4 \
     && python -m pip install -e .
diff --git a/Makefile b/Makefile
@@ -9,7 +9,7 @@ clean_pyc:
 	-find . -name '*.py[co]' -exec rm {} \;
 
 build: clean_pyc
-	python setup.py build_ext
+	python setup.py build_ext --inplace
 
 lint-diff:
 	git diff upstream/master --name-only -- "*.py" | xargs flake8

diff --git a/README.md b/README.md
@@ -60,27 +60,27 @@ Here are just a few of the things that pandas does well:
     and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
   - [**Time series**][timeseries]-specific functionality: date range
     generation and frequency conversion, moving window statistics,
-    date shifting and lagging
-
-
-   [missing-data]: https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html
-   [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#column-selection-addition-deletion
-   [alignment]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html?highlight=alignment#intro-to-data-structures
-   [groupby]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#group-by-split-apply-combine
-   [conversion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#dataframe
-   [slicing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#slicing-ranges
-   [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced
-   [subsetting]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing
-   [merging]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging
-   [joining]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-on-index
-   [reshape]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html
-   [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html
-   [mi]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#hierarchical-indexing-multiindex
-   [flat-files]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#csv-text-files
-   [excel]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#excel-files
-   [db]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#sql-queries
-   [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#hdf5-pytables
-   [timeseries]: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-series-date-functionality
+    date shifting and lagging.
+
+
+   [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data
+   [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion
+   [alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures
+   [groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine
+   [conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe
+   [slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges
+   [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix
+   [subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing
+   [merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging
+   [joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index
+   [reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables
+   [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations
+   [mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex
+   [flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files
+   [excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files
+   [db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries
+   [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables
+   [timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality
 
 ## Where to get it
 The source code is currently hosted on GitHub at:
@@ -154,7 +154,7 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
 Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
 
 ## Discussion and Development
-Most development discussions take place on GitHub in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
+Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
 
 ## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
 

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -5,7 +5,6 @@
 from pandas._libs import lib
 
 import pandas as pd
-from pandas.core.algorithms import make_duplicates_of_left_unique_in_right
 
 from .pandas_vb_common import tm
 
@@ -175,15 +174,4 @@ def time_argsort(self, N):
         self.array.argsort()
 
 
-class RemoveDuplicates:
-    def setup(self):
-        N = 10 ** 5
-        na = np.arange(int(N / 2))
-        self.left = np.concatenate([na[: int(N / 4)], na[: int(N / 4)]])
-        self.right = np.concatenate([na, na])
-
-    def time_make_duplicates_of_left_unique_in_right(self):
-        make_duplicates_of_left_unique_in_right(self.left, self.right)
-
-
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -1,5 +1,3 @@
-import string
-import sys
 import warnings
 
 import numpy as np
@@ -69,47 +67,6 @@ def time_existing_series(self):
         pd.Categorical(self.series)
 
 
-class AsType:
-    def setup(self):
-        N = 10 ** 5
-
-        random_pick = np.random.default_rng().choice
-
-        categories = {
-            "str": list(string.ascii_letters),
-            "int": np.random.randint(2 ** 16, size=154),
-            "float": sys.maxsize * np.random.random((38,)),
-            "timestamp": [
-                pd.Timestamp(x, unit="s") for x in np.random.randint(2 ** 18, size=578)
-            ],
-        }
-
-        self.df = pd.DataFrame(
-            {col: random_pick(cats, N) for col, cats in categories.items()}
-        )
-
-        for col in ("int", "float", "timestamp"):
-            self.df[col + "_as_str"] = self.df[col].astype(str)
-
-        for col in self.df.columns:
-            self.df[col] = self.df[col].astype("category")
-
-    def astype_str(self):
-        [self.df[col].astype("str") for col in "int float timestamp".split()]
-
-    def astype_int(self):
-        [self.df[col].astype("int") for col in "int_as_str timestamp".split()]
-
-    def astype_float(self):
-        [
-            self.df[col].astype("float")
-            for col in "float_as_str int int_as_str timestamp".split()
-        ]
-
-    def astype_datetime(self):
-        self.df["float"].astype(pd.DatetimeTZDtype(tz="US/Pacific"))
-
-
 class Concat:
     def setup(self):
         N = 10 ** 5

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -486,7 +486,7 @@ def setup(self):
         tmp2 = (np.random.random(10000) * 10.0).astype(np.float32)
         tmp = np.concatenate((tmp1, tmp2))
         arr = np.repeat(tmp, 10)
-        self.df = DataFrame({"a": arr, "b": arr})
+        self.df = DataFrame(dict(a=arr, b=arr))
 
     def time_sum(self):
         self.df.groupby(["a"])["b"].sum()
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,7 +12,6 @@ @@
     *.log
     *.swp
     *.pdb
-    *.zip
     .project
     .pydevproject
     .settings
@@ Expand Down @@