pandas-dev
diff --git a/‎.travis.yml
+19-16 b/‎.travis.yml
+19-16
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎asv_bench/benchmarks/dtypes.py
+22 b/‎asv_bench/benchmarks/dtypes.py
+22
diff --git a/‎ci/azure/posix.yml
+2-9 b/‎ci/azure/posix.yml
+2-9
diff --git a/‎ci/azure/windows.yml
+12-10 b/‎ci/azure/windows.yml
+12-10
diff --git a/‎ci/code_checks.sh
+7-3 b/‎ci/code_checks.sh
+7-3
diff --git a/‎ci/incremental/build.cmd
-9 b/‎ci/incremental/build.cmd
-9
diff --git a/‎ci/run_tests.sh
+2-2 b/‎ci/run_tests.sh
+2-2
diff --git a/‎ci/setup_env.sh
+2-1 b/‎ci/setup_env.sh
+2-1
diff --git a/‎doc/redirects.csv
+1-1 b/‎doc/redirects.csv
+1-1
diff --git a/‎doc/source/_static/favicon.ico
-3.81 KB b/‎doc/source/_static/favicon.ico
-3.81 KB
diff --git a/‎doc/source/conf.py
+6-2 b/‎doc/source/conf.py
+6-2
diff --git a/‎doc/source/getting_started/overview.rst
+1-2 b/‎doc/source/getting_started/overview.rst
+1-2
diff --git a/‎doc/source/reference/io.rst
+1-1 b/‎doc/source/reference/io.rst
+1-1
diff --git a/‎doc/source/user_guide/io.rst
+11-13 b/‎doc/source/user_guide/io.rst
+11-13
diff --git a/‎doc/source/user_guide/text.rst
+13-2 b/‎doc/source/user_guide/text.rst
+13-2
@@ -30,31 +30,34 @@ matrix:
       - python: 3.5
 
     include:
-    - dist: trusty
-      env:
+    - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
 
-    - dist: trusty
-      env:
+    - env:
         - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
 
-    - dist: trusty
-      env:
-        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8"
+    - env:
+        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
+      services:
+        - mysql
+        - postgresql
 
-    - dist: trusty
-      env:
-        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
+    - env:
+        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
+      services:
+        - mysql
+        - postgresql
 
     # In allow_failures
-    - dist: trusty
-      env:
-        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
+    - env:
+        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
+      services:
+        - mysql
+        - postgresql
 
     allow_failures:
-      - dist: trusty
-        env:
-          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
+      - env:
+          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
 
 before_install:
   - echo "before_install"
 
@@ -124,7 +124,7 @@ Here are just a few of the things that pandas does well:
     and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
   - [**Time series**][timeseries]-specific functionality: date range
     generation and frequency conversion, moving window statistics,
-    moving window linear regressions, date shifting and lagging, etc.
+    date shifting and lagging.
 
 
    [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data
 
@@ -5,6 +5,7 @@
 from .pandas_vb_common import (
     datetime_dtypes,
     extension_dtypes,
+    lib,
     numeric_dtypes,
     string_dtypes,
 )
@@ -40,4 +41,25 @@ def time_pandas_dtype_invalid(self, dtype):
             pass
 
 
+class InferDtypes:
+    param_names = ["dtype"]
+    data_dict = {
+        "np-object": np.array([1] * 100000, dtype="O"),
+        "py-object": [1] * 100000,
+        "np-null": np.array([1] * 50000 + [np.nan] * 50000),
+        "py-null": [1] * 50000 + [None] * 50000,
+        "np-int": np.array([1] * 100000, dtype=int),
+        "np-floating": np.array([1.0] * 100000, dtype=float),
+        "empty": [],
+        "bytes": [b"a"] * 100000,
+    }
+    params = list(data_dict.keys())
+
+    def time_infer_skipna(self, dtype):
+        lib.infer_dtype(self.data_dict[dtype], skipna=True)
+
+    def time_infer(self, dtype):
+        lib.infer_dtype(self.data_dict[dtype], skipna=False)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -69,20 +69,13 @@ jobs:
       displayName: 'Build versions'
 
     - task: PublishTestResults@2
+      condition: succeededOrFailed()
       inputs:
+        failTaskOnFailedTests: true
         testResultsFiles: 'test-data.xml'
         testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
       displayName: 'Publish test results'
 
-    - powershell: |
-        $(Get-Content "test-data.xml" | Out-String) -match 'failures="(.*?)"'
-        if ($matches[1] -eq 0) {
-          Write-Host "No test failures in test-data"
-        } else {
-          Write-Error "$($matches[1]) tests failed"  # will produce $LASTEXITCODE=1
-        }
-      displayName: 'Check for test failures'
-
     - script: |
         source activate pandas-dev
         python ci/print_skipped.py
 
@@ -23,32 +23,34 @@ jobs:
         Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
         Write-Host "##vso[task.prependpath]$HOME/miniconda3/bin"
       displayName: 'Add conda to PATH'
+
     - script: conda update -q -n base conda
       displayName: 'Update conda'
+
     - bash: |
         conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
       displayName: 'Create anaconda environment'
+
     - bash: |
         source activate pandas-dev
         conda list
-        ci\\incremental\\build.cmd
+        python setup.py build_ext -q -i
+        python -m pip install --no-build-isolation -e .
       displayName: 'Build'
+
     - bash: |
         source activate pandas-dev
         ci/run_tests.sh
       displayName: 'Test'
+
     - task: PublishTestResults@2
+      condition: succeededOrFailed()
       inputs:
+        failTaskOnFailedTests: true
         testResultsFiles: 'test-data.xml'
-        testRunTitle: 'Windows-$(CONDA_PY)'
-    - powershell: |
-        $(Get-Content "test-data.xml" | Out-String) -match 'failures="(.*?)"'
-        if ($matches[1] -eq 0) {
-          Write-Host "No test failures in test-data"
-        } else {
-          Write-Error "$($matches[1]) tests failed"  # will produce $LASTEXITCODE=1
-        }
-      displayName: 'Check for test failures'
+        testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
+      displayName: 'Publish test results'
+
     - bash: |
         source activate pandas-dev
         python ci/print_skipped.py
 
@@ -39,7 +39,7 @@ function invgrep {
 }
 
 if [[ "$GITHUB_ACTIONS" == "true" ]]; then
-    FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s"
+    FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
     INVGREP_PREPEND="##[error]"
 else
     FLAKE8_FORMAT="default"
@@ -52,7 +52,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     black --version
 
     MSG='Checking black formatting' ; echo $MSG
-	black . --check
+    black . --check
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # `setup.cfg` contains the list of error codes that are being ignored in flake8
@@ -104,7 +104,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     isort --version-number
 
     # Imports - Check formatting using isort see setup.cfg for settings
-    MSG='Check import format using isort ' ; echo $MSG
+    MSG='Check import format using isort' ; echo $MSG
     ISORT_CMD="isort --recursive --check-only pandas asv_bench"
     if [[ "$GITHUB_ACTIONS" == "true" ]]; then
         eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
@@ -203,6 +203,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include=*.{py,pyx} '\.__class__' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of xrange instead of range' ; echo $MSG
+    invgrep -R --include=*.{py,pyx} 'xrange' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
     INVGREP_APPEND=" <- trailing whitespaces found"
     invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
 
@@ -38,6 +38,6 @@ sh -c "$PYTEST_CMD"
 
 if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
     echo "uploading coverage"
-    echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
-          bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
+    echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME"
+          bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME
 fi
@@ -140,7 +140,8 @@ echo "conda list"
 conda list
 
 # Install DB for Linux
-if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
+
+if [[ -n ${SQL:0} ]]; then
   echo "installing dbs"
   mysql -e 'create database pandas_nosetest;'
   psql -c 'create database pandas_nosetest;' -U postgres
 
@@ -777,7 +777,7 @@ generated/pandas.io.formats.style.Styler.to_excel,../reference/api/pandas.io.for
 generated/pandas.io.formats.style.Styler.use,../reference/api/pandas.io.formats.style.Styler.use
 generated/pandas.io.formats.style.Styler.where,../reference/api/pandas.io.formats.style.Styler.where
 generated/pandas.io.json.build_table_schema,../reference/api/pandas.io.json.build_table_schema
-generated/pandas.io.json.json_normalize,../reference/api/pandas.io.json.json_normalize
+generated/pandas.io.json.json_normalize,../reference/api/pandas.json_normalize
 generated/pandas.io.stata.StataReader.data_label,../reference/api/pandas.io.stata.StataReader.data_label
 generated/pandas.io.stata.StataReader.value_labels,../reference/api/pandas.io.stata.StataReader.value_labels
 generated/pandas.io.stata.StataReader.variable_labels,../reference/api/pandas.io.stata.StataReader.variable_labels
 
@@ -204,7 +204,11 @@
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-# html_theme_options = {}
+html_theme_options = {
+    "external_links": [],
+    "github_url": "https://github.com/pandas-dev/pandas",
+    "twitter_url": "https://twitter.com/pandas_dev",
+}
 
 # Add any paths that contain custom themes here, relative to this directory.
 # html_theme_path = ["themes"]
@@ -228,7 +232,7 @@
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-html_favicon = os.path.join(html_static_path[0], "favicon.ico")
+html_favicon = "../../web/pandas/static/img/favicon.ico"
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
 
@@ -57,8 +57,7 @@ Here are just a few of the things that pandas does well:
     Excel files, databases, and saving / loading data from the ultrafast **HDF5
     format**
   - **Time series**-specific functionality: date range generation and frequency
-    conversion, moving window statistics, moving window linear regressions,
-    date shifting and lagging, etc.
+    conversion, moving window statistics, date shifting and lagging.
 
 Many of these principles are here to address the shortcomings frequently
 experienced using other languages / scientific research environments. For data
 
@@ -50,13 +50,13 @@ JSON
    :toctree: api/
 
    read_json
+   json_normalize
 
 .. currentmodule:: pandas.io.json
 
 .. autosummary::
    :toctree: api/
 
-   json_normalize
    build_table_schema
 
 .. currentmodule:: pandas
 
@@ -35,7 +35,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     binary;`SPSS <https://en.wikipedia.org/wiki/SPSS>`__;:ref:`read_spss<io.spss_reader>`;
     binary;`Python Pickle Format <https://docs.python.org/3/library/pickle.html>`__;:ref:`read_pickle<io.pickle>`;:ref:`to_pickle<io.pickle>`
     SQL;`SQL <https://en.wikipedia.org/wiki/SQL>`__;:ref:`read_sql<io.sql>`;:ref:`to_sql<io.sql>`
-    SQL;`Google Big Query <https://en.wikipedia.org/wiki/BigQuery>`__;:ref:`read_gbq<io.bigquery>`;:ref:`to_gbq<io.bigquery>`
+    SQL;`Google BigQuery <https://en.wikipedia.org/wiki/BigQuery>`__;:ref:`read_gbq<io.bigquery>`;:ref:`to_gbq<io.bigquery>`
 
 :ref:`Here <io.perf>` is an informal performance comparison for some of these IO methods.
 
@@ -2136,27 +2136,26 @@ into a flat table.
 
 .. ipython:: python
 
-   from pandas.io.json import json_normalize
    data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
            {'name': {'given': 'Mose', 'family': 'Regner'}},
            {'id': 2, 'name': 'Faye Raker'}]
-   json_normalize(data)
+   pd.json_normalize(data)
 
 .. ipython:: python
 
    data = [{'state': 'Florida',
             'shortname': 'FL',
             'info': {'governor': 'Rick Scott'},
-            'counties': [{'name': 'Dade', 'population': 12345},
-                         {'name': 'Broward', 'population': 40000},
-                         {'name': 'Palm Beach', 'population': 60000}]},
+            'county': [{'name': 'Dade', 'population': 12345},
+                       {'name': 'Broward', 'population': 40000},
+                       {'name': 'Palm Beach', 'population': 60000}]},
            {'state': 'Ohio',
             'shortname': 'OH',
             'info': {'governor': 'John Kasich'},
-            'counties': [{'name': 'Summit', 'population': 1234},
-                         {'name': 'Cuyahoga', 'population': 1337}]}]
+            'county': [{'name': 'Summit', 'population': 1234},
+                       {'name': 'Cuyahoga', 'population': 1337}]}]
 
-   json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
+   pd.json_normalize(data, 'county', ['state', 'shortname', ['info', 'governor']])
 
 The max_level parameter provides more control over which level to end normalization.
 With max_level=1 the following snippet normalizes until 1st nesting level of the provided dict.
@@ -2169,7 +2168,7 @@ With max_level=1 the following snippet normalizes until 1st nesting level of the
                                       'Name': 'Name001'}},
              'Image': {'a': 'b'}
              }]
-    json_normalize(data, max_level=1)
+    pd.json_normalize(data, max_level=1)
 
 .. _io.jsonl:
 
@@ -4764,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns
 .. ipython:: python
 
     df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
-    df.to_parquet(fname='test', engine='pyarrow',
+    df.to_parquet(path='test', engine='pyarrow',
                   partition_cols=['a'], compression=None)
 
-The `fname` specifies the parent directory to which data will be saved.
+The `path` specifies the parent directory to which data will be saved.
 The `partition_cols` are the column names by which the dataset will be partitioned.
 Columns are partitioned in the order they are given. The partition splits are
 determined by the unique values in the partition columns.
@@ -4829,7 +4828,6 @@ See also some :ref:`cookbook examples <cookbook.sql>` for some advanced strategi
 The key functions are:
 
 .. autosummary::
-    :toctree: ../reference/api/
 
     read_sql_table
     read_sql_query
 
@@ -74,6 +74,7 @@ These are places where the behavior of ``StringDtype`` objects differ from
 l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
    that return **numeric** output will always return a nullable integer dtype,
    rather than either int or float dtype, depending on the presence of NA values.
+   Methods returning **boolean** output will return a nullable boolean dtype.
 
    .. ipython:: python
 
@@ -89,12 +90,22 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
       s.astype(object).str.count("a")
       s.astype(object).dropna().str.count("a")
 
-   When NA values are present, the output dtype is float64.
+   When NA values are present, the output dtype is float64. Similarly for
+   methods returning boolean values.
+
+   .. ipython:: python
+
+      s.str.isdigit()
+      s.str.match("a")
 
 2. Some string methods, like :meth:`Series.str.decode` are not available
    on ``StringArray`` because ``StringArray`` only holds strings, not
    bytes.
-
+3. In comparision operations, :class:`arrays.StringArray` and ``Series`` backed
+   by a ``StringArray`` will return an object with :class:`BooleanDtype`,
+   rather than a ``bool`` dtype object. Missing values in a ``StringArray``
+   will propagate in comparision operations, rather than always comparing
+   unequal like :attr:`numpy.nan`.
 
 Everything else that follows in the rest of this document applies equally to
 ``string`` and ``object`` dtype.