pandas-dev
diff --git a/‎.github/actions/build_pandas/action.yml
+3-1 b/‎.github/actions/build_pandas/action.yml
+3-1
diff --git a/‎.github/actions/run-tests/action.yml
+27 b/‎.github/actions/run-tests/action.yml
+27
diff --git a/‎.github/workflows/macos-windows.yml
+1-15 b/‎.github/workflows/macos-windows.yml
+1-15
diff --git a/‎.github/workflows/posix.yml
+1-18 b/‎.github/workflows/posix.yml
+1-18
diff --git a/‎.github/workflows/python-dev.yml
+10-30 b/‎.github/workflows/python-dev.yml
+10-30
diff --git a/‎doc/source/reference/frame.rst
+1 b/‎doc/source/reference/frame.rst
+1
diff --git a/‎doc/source/reference/io.rst
+1 b/‎doc/source/reference/io.rst
+1
diff --git a/‎doc/source/reference/testing.rst
+2 b/‎doc/source/reference/testing.rst
+2
diff --git a/‎doc/source/user_guide/io.rst
+55-4 b/‎doc/source/user_guide/io.rst
+55-4
diff --git a/‎doc/source/whatsnew/v1.4.3.rst
+4 b/‎doc/source/whatsnew/v1.4.3.rst
+4
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
+26-1 b/‎doc/source/whatsnew/v1.5.0.rst
+26-1
diff --git a/‎pandas/_libs/tslib.pyi
+1 b/‎pandas/_libs/tslib.pyi
+1
diff --git a/‎pandas/_libs/tslib.pyx
+8-5 b/‎pandas/_libs/tslib.pyx
+8-5
diff --git a/‎pandas/_libs/tslibs/ccalendar.pxd
-2 b/‎pandas/_libs/tslibs/ccalendar.pxd
-2
@@ -17,4 +17,6 @@ runs:
       shell: bash -el {0}
       env:
         # Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
-        N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
+        # GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
+        N_JOBS: 1
+        #N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
@@ -0,0 +1,27 @@
+name: Run tests and report results
+runs:
+  using: composite
+  steps:
+    - name: Test
+      run: ci/run_tests.sh
+      shell: bash -el {0}
+
+    - name: Publish test results
+      uses: actions/upload-artifact@v2
+      with:
+        name: Test results
+        path: test-data.xml
+      if: failure()
+
+    - name: Report Coverage
+      run: coverage report -m
+      shell: bash -el {0}
+      if: failure()
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v2
+      with:
+        flags: unittests
+        name: codecov-pandas
+        fail_ci_if_error: false
+      if: failure()
@@ -53,18 +53,4 @@ jobs:
       uses: ./.github/actions/build_pandas
 
     - name: Test
-      run: ci/run_tests.sh
-
-    - name: Publish test results
-      uses: actions/upload-artifact@v3
-      with:
-        name: Test results
-        path: test-data.xml
-      if: failure()
-
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v2
-      with:
-        flags: unittests
-        name: codecov-pandas
-        fail_ci_if_error: false
+      uses: ./.github/actions/run-tests
@@ -157,23 +157,6 @@ jobs:
       uses: ./.github/actions/build_pandas
 
     - name: Test
-      run: ci/run_tests.sh
+      uses: ./.github/actions/run-tests
       # TODO: Don't continue on error for PyPy
       continue-on-error: ${{ env.IS_PYPY == 'true' }}
-
-    - name: Build Version
-      run: conda list
-
-    - name: Publish test results
-      uses: actions/upload-artifact@v3
-      with:
-        name: Test results
-        path: test-data.xml
-      if: failure()
-
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v2
-      with:
-        flags: unittests
-        name: codecov-pandas
-        fail_ci_if_error: false
@@ -57,40 +57,20 @@ jobs:
     - name: Install dependencies
       shell: bash -el {0}
       run: |
-        python -m pip install --upgrade pip setuptools wheel
-        pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
-        pip install git+https://github.com/nedbat/coveragepy.git
-        pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov
-        pip list
+        python3 -m pip install --upgrade pip setuptools wheel
+        python3 -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
+        python3 -m pip install git+https://github.com/nedbat/coveragepy.git
+        python3 -m pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
+        python3 -m pip list
 
     - name: Build Pandas
       run: |
-        python setup.py build_ext -q -j2
-        python -m pip install -e . --no-build-isolation --no-use-pep517
+        python3 setup.py build_ext -q -j2
+        python3 -m pip install -e . --no-build-isolation --no-use-pep517
 
     - name: Build Version
       run: |
-        python -c "import pandas; pandas.show_versions();"
+        python3 -c "import pandas; pandas.show_versions();"
 
-    - name: Test with pytest
-      shell: bash -el {0}
-      run: |
-        ci/run_tests.sh
-
-    - name: Publish test results
-      uses: actions/upload-artifact@v3
-      with:
-        name: Test results
-        path: test-data.xml
-      if: failure()
-
-    - name: Report Coverage
-      run: |
-        coverage report -m
-
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v2
-      with:
-        flags: unittests
-        name: codecov-pandas
-        fail_ci_if_error: true
+    - name: Test
+      uses: ./.github/actions/run-tests
@@ -373,6 +373,7 @@ Serialization / IO / conversion
 
    DataFrame.from_dict
    DataFrame.from_records
+   DataFrame.to_orc
    DataFrame.to_parquet
    DataFrame.to_pickle
    DataFrame.to_csv
 
@@ -159,6 +159,7 @@ ORC
    :toctree: api/
 
    read_orc
+   DataFrame.to_orc
 
 SAS
 ~~~
 
@@ -30,6 +30,7 @@ Exceptions and warnings
    errors.DtypeWarning
    errors.DuplicateLabelError
    errors.EmptyDataError
+   errors.IndexingError
    errors.InvalidIndexError
    errors.IntCastingNaNError
    errors.MergeError
@@ -45,6 +46,7 @@ Exceptions and warnings
    errors.SettingWithCopyError
    errors.SettingWithCopyWarning
    errors.SpecificationError
+   errors.UndefinedVariableError
    errors.UnsortedIndexError
    errors.UnsupportedFunctionCall
 
 
@@ -30,7 +30,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
     binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`
     binary;`Parquet Format <https://parquet.apache.org/>`__;:ref:`read_parquet<io.parquet>`;:ref:`to_parquet<io.parquet>`
-    binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;
+    binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;:ref:`to_orc<io.orc>`
     binary;`Stata <https://en.wikipedia.org/wiki/Stata>`__;:ref:`read_stata<io.stata_reader>`;:ref:`to_stata<io.stata_writer>`
     binary;`SAS <https://en.wikipedia.org/wiki/SAS_(software)>`__;:ref:`read_sas<io.sas_reader>`;
     binary;`SPSS <https://en.wikipedia.org/wiki/SPSS>`__;:ref:`read_spss<io.spss_reader>`;
@@ -5562,13 +5562,64 @@ ORC
 .. versionadded:: 1.0.0
 
 Similar to the :ref:`parquet <io.parquet>` format, the `ORC Format <https://orc.apache.org/>`__ is a binary columnar serialization
-for data frames. It is designed to make reading data frames efficient. pandas provides *only* a reader for the
-ORC format, :func:`~pandas.read_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
+for data frames. It is designed to make reading data frames efficient. pandas provides both the reader and the writer for the
+ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
 
 .. warning::
 
    * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
-   * :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+   * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0.
+   * :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc` are not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+   * For supported dtypes please refer to `supported ORC features in Arrow <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
+   * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files.
+
+.. ipython:: python
+
+   df = pd.DataFrame(
+       {
+           "a": list("abc"),
+           "b": list(range(1, 4)),
+           "c": np.arange(4.0, 7.0, dtype="float64"),
+           "d": [True, False, True],
+           "e": pd.date_range("20130101", periods=3),
+       }
+   )
+
+   df
+   df.dtypes
+
+Write to an orc file.
+
+.. ipython:: python
+   :okwarning:
+
+   df.to_orc("example_pa.orc", engine="pyarrow")
+
+Read from an orc file.
+
+.. ipython:: python
+   :okwarning:
+
+   result = pd.read_orc("example_pa.orc")
+
+   result.dtypes
+
+Read only certain columns of an orc file.
+
+.. ipython:: python
+
+   result = pd.read_orc(
+       "example_pa.orc",
+       columns=["a", "b"],
+   )
+   result.dtypes
+
+
+.. ipython:: python
+   :suppress:
+
+   os.remove("example_pa.orc")
+
 
 .. _io.sql:
 
 
@@ -15,15 +15,19 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`)
+- Fixed regression in :meth:`DataFrame.to_csv` raising error when :class:`DataFrame` contains extension dtype categorical column (:issue:`46297`, :issue:`46812`)
+- Fixed regression in representation of ``dtypes`` attribute of :class:`MultiIndex` (:issue:`46900`)
 - Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`)
 - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
 - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
 - Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`)
 - Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`)
+- Fixed regression in ``NaN`` comparison for :class:`Index` operations where the same object was compared (:issue:`47105`)
 - Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`)
 - Fixed regression in :func:`read_csv` with ``index_col=False`` identifying first row as index names when ``header=None`` (:issue:`46955`)
 - Fixed regression in :meth:`.DataFrameGroupBy.agg` when used with list-likes or dict-likes and ``axis=1`` that would give incorrect results; now raises ``NotImplementedError`` (:issue:`46995`)
 - Fixed regression in :meth:`DataFrame.resample` and :meth:`DataFrame.rolling` when used with list-likes or dict-likes and ``axis=1`` that would raise an unintuitive error message; now raises ``NotImplementedError`` (:issue:`46904`)
+- Fixed regression in :func:`assert_index_equal` when ``check_order=False`` and :class:`Index` has extension or object dtype (:issue:`47207`)
 - Fixed regression in :func:`read_excel` returning ints as floats on certain input sheets (:issue:`46988`)
 - Fixed regression in :meth:`DataFrame.shift` when ``axis`` is ``columns`` and ``fill_value`` is absent, ``freq`` is ignored (:issue:`47039`)
 
 
@@ -100,6 +100,28 @@ as seen in the following example.
                1 2021-01-02 08:00:00  4
                2 2021-01-02 16:00:00  5
 
+.. _whatsnew_150.enhancements.orc:
+
+Writing to ORC files
+^^^^^^^^^^^^^^^^^^^^
+
+The new method :meth:`DataFrame.to_orc` allows writing to ORC files (:issue:`43864`).
+
+This functionality depends the `pyarrow <http://arrow.apache.org/docs/python/>`__ library. For more details, see :ref:`the IO docs on ORC <io.orc>`.
+
+.. warning::
+
+   * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
+   * :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0.
+   * :func:`~pandas.DataFrame.to_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+   * For supported dtypes please refer to `supported ORC features in Arrow <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
+   * Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files.
+
+.. code-block:: python
+
+    df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
+    df.to_orc("./out.orc")
+
 .. _whatsnew_150.enhancements.tar:
 
 Reading directly from TAR archives
@@ -152,8 +174,9 @@ Other enhancements
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
 - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`)
 - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
-- :class:`DataError`, :class:`SpecificationError`, :class:`SettingWithCopyError`, :class:`SettingWithCopyWarning`, and :class:`NumExprClobberingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
+- :class:`DataError`, :class:`SpecificationError`, :class:`SettingWithCopyError`, :class:`SettingWithCopyWarning`, :class:`NumExprClobberingError`, :class:`UndefinedVariableError`, and :class:`IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
 - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
+- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.notable_bug_fixes:
@@ -850,6 +873,7 @@ I/O
 - Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`)
 - Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
 - Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)
+- Bug in :meth:`DataFrame.to_excel` when writing an empty dataframe with :class:`MultiIndex` (:issue:`19543`)
 
 Period
 ^^^^^^
@@ -902,6 +926,7 @@ Reshaping
 - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
 - Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
 - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
+- Bug in :func:`concat` not sorting the column names when ``None`` is included (:issue:`47331`)
 - Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`)
 - Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`)
 - Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
 
@@ -9,6 +9,7 @@ def format_array_from_datetime(
     tz: tzinfo | None = ...,
     format: str | None = ...,
     na_rep: object = ...,
+    reso: int = ...,  # NPY_DATETIMEUNIT
 ) -> npt.NDArray[np.object_]: ...
 def array_with_unit_to_datetime(
     values: np.ndarray,
 
@@ -28,11 +28,12 @@ import pytz
 
 from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
+    NPY_FR_ns,
     check_dts_bounds,
-    dt64_to_dtstruct,
     dtstruct_to_dt64,
     get_datetime64_value,
     npy_datetimestruct,
+    pandas_datetime_to_datetimestruct,
     pydate_to_dt64,
     pydatetime_to_dt64,
     string_to_dts,
@@ -107,7 +108,8 @@ def format_array_from_datetime(
     ndarray[int64_t] values,
     tzinfo tz=None,
     str format=None,
-    object na_rep=None
+    object na_rep=None,
+    NPY_DATETIMEUNIT reso=NPY_FR_ns,
 ) -> np.ndarray:
     """
     return a np object array of the string formatted values
@@ -120,6 +122,7 @@ def format_array_from_datetime(
           a strftime capable string
     na_rep : optional, default is None
           a nat format
+    reso : NPY_DATETIMEUNIT, default NPY_FR_ns
 
     Returns
     -------
@@ -141,7 +144,7 @@ def format_array_from_datetime(
     # a format based on precision
     basic_format = format is None and tz is None
     if basic_format:
-        reso_obj = get_resolution(values)
+        reso_obj = get_resolution(values, reso=reso)
         show_ns = reso_obj == Resolution.RESO_NS
         show_us = reso_obj == Resolution.RESO_US
         show_ms = reso_obj == Resolution.RESO_MS
@@ -153,7 +156,7 @@ def format_array_from_datetime(
             result[i] = na_rep
         elif basic_format:
 
-            dt64_to_dtstruct(val, &dts)
+            pandas_datetime_to_datetimestruct(val, reso, &dts)
             res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} '
                    f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}')
 
@@ -169,7 +172,7 @@ def format_array_from_datetime(
 
         else:
 
-            ts = Timestamp(val, tz=tz)
+            ts = Timestamp._from_value_and_reso(val, reso=reso, tz=tz)
             if format is None:
                 result[i] = str(ts)
             else:
 
@@ -15,8 +15,6 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil
 cpdef int get_lastbday(int year, int month) nogil
 cpdef int get_firstbday(int year, int month) nogil
 
-cdef int64_t DAY_NANOS
-cdef int64_t HOUR_NANOS
 cdef dict c_MONTH_NUMBERS
 
 cdef int32_t* month_offset