yehoshuadimarsky
diff --git a/‎asv_bench/benchmarks/io/json.py
+24 b/‎asv_bench/benchmarks/io/json.py
+24
diff --git a/‎ci/azure/posix.yml
+21-7 b/‎ci/azure/posix.yml
+21-7
diff --git a/‎ci/code_checks.sh
+8 b/‎ci/code_checks.sh
+8
diff --git a/‎ci/run_tests.sh
-11 b/‎ci/run_tests.sh
-11
diff --git a/‎ci/setup_env.sh
+3-3 b/‎ci/setup_env.sh
+3-3
diff --git a/‎doc/source/whatsnew/v1.0.0.rst
+10-3 b/‎doc/source/whatsnew/v1.0.0.rst
+10-3
diff --git a/‎pandas/_libs/intervaltree.pxi.in
+1-4 b/‎pandas/_libs/intervaltree.pxi.in
+1-4
diff --git a/‎pandas/_libs/parsers.pyx
+9-27 b/‎pandas/_libs/parsers.pyx
+9-27
diff --git a/‎pandas/_libs/reduction.pyx
+10-4 b/‎pandas/_libs/reduction.pyx
+10-4
diff --git a/‎pandas/_libs/src/parser/tokenizer.c
+7 b/‎pandas/_libs/src/parser/tokenizer.c
+7
diff --git a/‎pandas/_libs/src/parser/tokenizer.h
+4-5 b/‎pandas/_libs/src/parser/tokenizer.h
+4-5
diff --git a/‎pandas/_libs/src/ujson/lib/ultrajson.h
+4 b/‎pandas/_libs/src/ujson/lib/ultrajson.h
+4
@@ -132,6 +132,30 @@ def peakmem_to_json_wide(self, orient, frame):
         df.to_json(self.fname, orient=orient)
 
 
+class ToJSONISO(BaseIO):
+    fname = "__test__.json"
+    params = [["split", "columns", "index", "values", "records"]]
+    param_names = ["orient"]
+
+    def setup(self, orient):
+        N = 10 ** 5
+        index = date_range("20000101", periods=N, freq="H")
+        timedeltas = timedelta_range(start=1, periods=N, freq="s")
+        datetimes = date_range(start=1, periods=N, freq="s")
+        self.df = DataFrame(
+            {
+                "td_1": timedeltas,
+                "td_2": timedeltas,
+                "ts_1": datetimes,
+                "ts_2": datetimes,
+            },
+            index=index,
+        )
+
+    def time_iso_format(self, orient):
+        self.df.to_json(orient=orient, date_format="iso")
+
+
 class ToJSONLines(BaseIO):
 
     fname = "__test__.json"
 
@@ -19,18 +19,24 @@ jobs:
           ENV_FILE: ci/deps/azure-36-minimum_versions.yaml
           CONDA_PY: "36"
           PATTERN: "not slow and not network"
+
         py36_locale_slow_old_np:
           ENV_FILE: ci/deps/azure-36-locale_slow.yaml
           CONDA_PY: "36"
           PATTERN: "slow"
-          LOCALE_OVERRIDE: "zh_CN.UTF-8"
+          # pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
+          # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
+          LANG: "zh_CN.utf8"
+          LC_ALL: "zh_CN.utf8"
           EXTRA_APT: "language-pack-zh-hans"
 
         py36_locale:
           ENV_FILE: ci/deps/azure-36-locale.yaml
           CONDA_PY: "36"
           PATTERN: "not slow and not network"
-          LOCALE_OVERRIDE: "it_IT.UTF-8"
+          LANG: "it_IT.utf8"
+          LC_ALL: "it_IT.utf8"
+          EXTRA_APT: "language-pack-it"
 
         py36_32bit:
           ENV_FILE: ci/deps/azure-36-32bit.yaml
@@ -42,7 +48,9 @@ jobs:
           ENV_FILE: ci/deps/azure-37-locale.yaml
           CONDA_PY: "37"
           PATTERN: "not slow and not network"
-          LOCALE_OVERRIDE: "zh_CN.UTF-8"
+          LANG: "zh_CN.utf8"
+          LC_ALL: "zh_CN.utf8"
+          EXTRA_APT: "language-pack-zh-hans"
 
         py37_np_dev:
           ENV_FILE: ci/deps/azure-37-numpydev.yaml
@@ -54,10 +62,16 @@ jobs:
 
   steps:
     - script: |
-        if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
-        echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
-        echo "Creating Environment"
-        ci/setup_env.sh
+        if [ "$(uname)" == "Linux" ]; then
+          sudo apt-get update
+          sudo apt-get install -y libc6-dev-i386 $EXTRA_APT
+        fi
+      displayName: 'Install extra packages'
+
+    - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
+      displayName: 'Set conda path'
+
+    - script: ci/setup_env.sh
       displayName: 'Setup environment and build pandas'
 
     - script: |
 
@@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of not concatenated strings' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
+    else
+        $BASE_DIR/scripts/validate_string_concatenation.py .
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     echo "isort --version-number"
     isort --version-number
 
 
@@ -5,17 +5,6 @@
 # https://github.com/pytest-dev/pytest/issues/1075
 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
 
-if [ -n "$LOCALE_OVERRIDE" ]; then
-    export LC_ALL="$LOCALE_OVERRIDE"
-    export LANG="$LOCALE_OVERRIDE"
-    PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
-    if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
-        echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
-        # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
-        # exit 1
-    fi
-fi
-
 if [[ "not network" == *"$PATTERN"* ]]; then
     export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
 fi
 
@@ -1,15 +1,15 @@
 #!/bin/bash -e
 
 # edit the locale file if needed
-if [ -n "$LOCALE_OVERRIDE" ]; then
+if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
     echo "Adding locale to the first line of pandas/__init__.py"
     rm -f pandas/__init__.pyc
-    SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
+    SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n"
     sed -i "$SEDC" pandas/__init__.py
+
     echo "[head -4 pandas/__init__.py]"
     head -4 pandas/__init__.py
     echo
-    sudo locale-gen "$LOCALE_OVERRIDE"
 fi
 
 MINICONDA_DIR="$HOME/miniconda3"
 
@@ -56,7 +56,7 @@ Dedicated string data type
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 We've added :class:`StringDtype`, an extension type dedicated to string data.
-Previously, strings were typically stored in object-dtype NumPy arrays.
+Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`)
 
 .. warning::
 
@@ -216,13 +216,15 @@ Other enhancements
   (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
   now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
 - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
+- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
 - The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
 - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
 - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
 - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
 - :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
 - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
 - Added new writer for exporting Stata dta files in version 118, ``StataWriter118``.  This format supports exporting strings containing Unicode characters (:issue:`23573`)
+- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -812,6 +814,7 @@ Datetimelike
 - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
 - Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
 - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
+- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`)
 - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
 - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
 - Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
@@ -924,6 +927,7 @@ I/O
 - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
 - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
 - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
+- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
 
 Plotting
 ^^^^^^^^
@@ -945,7 +949,7 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
+- Bug in :meth:`DataFrame.groupby.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`)
 - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
 - Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`)
@@ -975,6 +979,7 @@ Reshaping
 - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
 - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
 - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
+- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`)
 - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
 - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
 - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
@@ -1013,7 +1018,9 @@ Other
 - Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
 - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
 - Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
-
+- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
+- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
+- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
 
 .. _whatsnew_1000.contributors:
 
 
@@ -8,14 +8,11 @@ from pandas._libs.algos import is_monotonic
 
 ctypedef fused int_scalar_t:
     int64_t
-    int32_t
     float64_t
-    float32_t
 
 ctypedef fused uint_scalar_t:
     uint64_t
     float64_t
-    float32_t
 
 ctypedef fused scalar_t:
     int_scalar_t
@@ -212,7 +209,7 @@ cdef sort_values_and_indices(all_values, all_indices, subset):
 {{py:
 
 nodes = []
-for dtype in ['float32', 'float64', 'int32', 'int64', 'uint64']:
+for dtype in ['float64', 'int64', 'uint64']:
     for closed, cmp_left, cmp_right in [
         ('left', '<=', '<'),
         ('right', '<', '<='),
 
@@ -171,12 +171,9 @@ cdef extern from "parser/tokenizer.h":
         int64_t skip_first_N_rows
         int64_t skipfooter
         # pick one, depending on whether the converter requires GIL
-        float64_t (*double_converter_nogil)(const char *, char **,
-                                            char, char, char,
-                                            int, int *, int *) nogil
-        float64_t (*double_converter_withgil)(const char *, char **,
-                                              char, char, char,
-                                              int, int *, int *)
+        float64_t (*double_converter)(const char *, char **,
+                                      char, char, char,
+                                      int, int *, int *) nogil
 
         #  error handling
         char *warn_msg
@@ -469,16 +466,11 @@ cdef class TextReader:
 
         if float_precision == "round_trip":
             # see gh-15140
-            #
-            # Our current roundtrip implementation requires the GIL.
-            self.parser.double_converter_nogil = NULL
-            self.parser.double_converter_withgil = round_trip
+            self.parser.double_converter = round_trip
         elif float_precision == "high":
-            self.parser.double_converter_withgil = NULL
-            self.parser.double_converter_nogil = precise_xstrtod
+            self.parser.double_converter = precise_xstrtod
         else:
-            self.parser.double_converter_withgil = NULL
-            self.parser.double_converter_nogil = xstrtod
+            self.parser.double_converter = xstrtod
 
         if isinstance(dtype, dict):
             dtype = {k: pandas_dtype(dtype[k])
@@ -1663,22 +1655,12 @@ cdef _try_double(parser_t *parser, int64_t col,
     result = np.empty(lines, dtype=np.float64)
     data = <float64_t *>result.data
     na_fset = kset_float64_from_list(na_flist)
-    if parser.double_converter_nogil != NULL:  # if it can run without the GIL
-        with nogil:
-            error = _try_double_nogil(parser, parser.double_converter_nogil,
-                                      col, line_start, line_end,
-                                      na_filter, na_hashset, use_na_flist,
-                                      na_fset, NA, data, &na_count)
-    else:
-        assert parser.double_converter_withgil != NULL
-        error = _try_double_nogil(parser,
-                                  <float64_t (*)(const char *, char **,
-                                                 char, char, char,
-                                                 int, int *, int *)
-                                  nogil>parser.double_converter_withgil,
+    with nogil:
+        error = _try_double_nogil(parser, parser.double_converter,
                                   col, line_start, line_end,
                                   na_filter, na_hashset, use_na_flist,
                                   na_fset, NA, data, &na_count)
+
     kh_destroy_float64(na_fset)
     if error != 0:
         return None, None
 
@@ -1,3 +1,4 @@
+from copy import copy
 from distutils.version import LooseVersion
 
 from cython import Py_ssize_t
@@ -15,7 +16,7 @@ from numpy cimport (ndarray,
 cnp.import_array()
 
 cimport pandas._libs.util as util
-from pandas._libs.lib import maybe_convert_objects
+from pandas._libs.lib import maybe_convert_objects, is_scalar
 
 
 cdef _check_result_array(object obj, Py_ssize_t cnt):
@@ -492,14 +493,19 @@ def apply_frame_axis0(object frame, object f, object names,
             # Need to infer if low level index slider will cause segfaults
             require_slow_apply = i == 0 and piece is chunk
             try:
-                if piece.index is chunk.index:
-                    piece = piece.copy(deep='all')
-                else:
+                if piece.index is not chunk.index:
                     mutated = True
             except AttributeError:
                 # `piece` might not have an index, could be e.g. an int
                 pass
 
+            if not is_scalar(piece):
+                # Need to copy data to avoid appending references
+                if hasattr(piece, "copy"):
+                    piece = piece.copy(deep="all")
+                else:
+                    piece = copy(piece)
+
             results.append(piece)
 
             # If the data was modified inplace we need to
 
@@ -1774,11 +1774,18 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
 
 double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
                   int skip_trailing, int *error, int *maybe_int) {
+    // This is called from a nogil block in parsers.pyx
+    // so need to explicitly get GIL before Python calls
+    PyGILState_STATE gstate;
+    gstate = PyGILState_Ensure();
+
     double r = PyOS_string_to_double(p, q, 0);
     if (maybe_int != NULL) *maybe_int = 0;
     if (PyErr_Occurred() != NULL) *error = -1;
     else if (r == Py_HUGE_VAL) *error = (int)Py_HUGE_VAL;
     PyErr_Clear();
+
+    PyGILState_Release(gstate);
     return r;
 }
 
 
@@ -155,11 +155,8 @@ typedef struct parser_t {
     PyObject *skipfunc;
     int64_t skip_first_N_rows;
     int64_t skip_footer;
-    // pick one, depending on whether the converter requires GIL
-    double (*double_converter_nogil)(const char *, char **,
-                                     char, char, char, int, int *, int *);
-    double (*double_converter_withgil)(const char *, char **,
-                                       char, char, char, int, int *, int *);
+    double (*double_converter)(const char *, char **,
+                               char, char, char, int, int *, int *);
 
     // error handling
     char *warn_msg;
@@ -226,6 +223,8 @@ double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
 double precise_xstrtod(const char *p, char **q, char decimal,
                        char sci, char tsep, int skip_trailing,
                        int *error, int *maybe_int);
+
+// GH-15140 - round_trip requires and acquires the GIL on its own
 double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
                   int skip_trailing, int *error, int *maybe_int);
 int to_boolean(const char *item, uint8_t *val);
 
@@ -154,6 +154,8 @@ enum JSTYPES {
   JT_ARRAY,    // Array structure
   JT_OBJECT,   // Key/Value structure
   JT_INVALID,  // Internal, do not return nor expect
+  JT_POS_INF,  // Positive infinity
+  JT_NEG_INF,  // Negative infinity
 };
 
 typedef void * JSOBJ;
@@ -290,6 +292,8 @@ typedef struct __JSONObjectDecoder {
   JSOBJ (*newTrue)(void *prv);
   JSOBJ (*newFalse)(void *prv);
   JSOBJ (*newNull)(void *prv);
+  JSOBJ (*newPosInf)(void *prv);
+  JSOBJ (*newNegInf)(void *prv);
   JSOBJ (*newObject)(void *prv, void *decoder);
   JSOBJ (*endObject)(void *prv, JSOBJ obj);
   JSOBJ (*newArray)(void *prv, void *decoder);