Skip to content

Commit c7cf82d

Browse files
authored
Merge branch 'main' into regression_test_for_bar_plot
2 parents a0cd359 + 007bf4a commit c7cf82d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1407
-423
lines changed

.github/actions/run-tests/action.yml

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Run tests and report results
2+
runs:
3+
using: composite
4+
steps:
5+
- name: Test
6+
run: ci/run_tests.sh
7+
shell: bash -el {0}
8+
9+
- name: Publish test results
10+
uses: actions/upload-artifact@v2
11+
with:
12+
name: Test results
13+
path: test-data.xml
14+
if: failure()
15+
16+
- name: Report Coverage
17+
run: coverage report -m
18+
shell: bash -el {0}
19+
if: failure()
20+
21+
- name: Upload coverage to Codecov
22+
uses: codecov/codecov-action@v2
23+
with:
24+
flags: unittests
25+
name: codecov-pandas
26+
fail_ci_if_error: false
27+
if: failure()

.github/actions/setup-conda/action.yml

+10-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ inputs:
33
environment-file:
44
description: Conda environment file to use.
55
default: environment.yml
6+
environment-name:
7+
description: Name to use for the Conda environment
8+
default: test
9+
python-version:
10+
description: Python version to install
11+
required: false
612
pyarrow-version:
713
description: If set, overrides the PyArrow version in the Conda environment to the given string.
814
required: false
@@ -21,8 +27,11 @@ runs:
2127
uses: conda-incubator/[email protected]
2228
with:
2329
environment-file: ${{ inputs.environment-file }}
30+
activate-environment: ${{ inputs.environment-name }}
31+
python-version: ${{ inputs.python-version }}
2432
channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
2533
channels: conda-forge
26-
mamba-version: "0.23"
34+
mamba-version: "0.24"
2735
use-mamba: true
36+
use-only-tar-bz2: true
2837
condarc-file: ci/condarc.yml

.github/workflows/asv-bot.yml

+2-7
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,8 @@ jobs:
4141

4242
# Although asv sets up its own env, deps are still needed
4343
# during discovery process
44-
- uses: conda-incubator/[email protected]
45-
with:
46-
activate-environment: pandas-dev
47-
channel-priority: strict
48-
environment-file: ${{ env.ENV_FILE }}
49-
use-only-tar-bz2: true
50-
condarc-file: ci/condarc.yml
44+
- name: Set up Conda
45+
uses: ./.github/actions/setup-conda
5146

5247
- name: Run benchmarks
5348
id: bench

.github/workflows/code-checks.yml

+4-18
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,8 @@ jobs:
5858
path: ~/conda_pkgs_dir
5959
key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
6060

61-
- uses: conda-incubator/[email protected]
62-
with:
63-
mamba-version: "*"
64-
channels: conda-forge
65-
activate-environment: pandas-dev
66-
channel-priority: strict
67-
environment-file: ${{ env.ENV_FILE }}
68-
use-only-tar-bz2: true
69-
condarc-file: ci/condarc.yml
61+
- name: Set up Conda
62+
uses: ./.github/actions/setup-conda
7063

7164
- name: Build Pandas
7265
id: build
@@ -128,15 +121,8 @@ jobs:
128121
path: ~/conda_pkgs_dir
129122
key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
130123

131-
- uses: conda-incubator/[email protected]
132-
with:
133-
mamba-version: "*"
134-
channels: conda-forge
135-
activate-environment: pandas-dev
136-
channel-priority: strict
137-
environment-file: ${{ env.ENV_FILE }}
138-
use-only-tar-bz2: true
139-
condarc-file: ci/condarc.yml
124+
- name: Set up Conda
125+
uses: ./.github/actions/setup-conda
140126

141127
- name: Build Pandas
142128
id: build

.github/workflows/macos-windows.yml

+1-15
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,4 @@ jobs:
5353
uses: ./.github/actions/build_pandas
5454

5555
- name: Test
56-
run: ci/run_tests.sh
57-
58-
- name: Publish test results
59-
uses: actions/upload-artifact@v3
60-
with:
61-
name: Test results
62-
path: test-data.xml
63-
if: failure()
64-
65-
- name: Upload coverage to Codecov
66-
uses: codecov/codecov-action@v2
67-
with:
68-
flags: unittests
69-
name: codecov-pandas
70-
fail_ci_if_error: false
56+
uses: ./.github/actions/run-tests

.github/workflows/posix.yml

+4-29
Original file line numberDiff line numberDiff line change
@@ -147,41 +147,16 @@ jobs:
147147
# xsel for clipboard tests
148148
run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }}
149149

150-
- uses: conda-incubator/[email protected]
150+
- name: Set up Conda
151+
uses: ./.github/actions/setup-conda
151152
with:
152-
mamba-version: "*"
153-
channels: conda-forge
154-
activate-environment: pandas-dev
155-
channel-priority: flexible
156153
environment-file: ${{ env.ENV_FILE }}
157-
use-only-tar-bz2: true
158-
condarc-file: ci/condarc.yml
159-
160-
- name: Upgrade Arrow version
161-
run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=${{ matrix.pyarrow_version }}
162-
if: ${{ matrix.pyarrow_version }}
154+
pyarrow-version: ${{ matrix.pyarrow_version }}
163155

164156
- name: Build Pandas
165157
uses: ./.github/actions/build_pandas
166158

167159
- name: Test
168-
run: ci/run_tests.sh
160+
uses: ./.github/actions/run-tests
169161
# TODO: Don't continue on error for PyPy
170162
continue-on-error: ${{ env.IS_PYPY == 'true' }}
171-
172-
- name: Build Version
173-
run: conda list
174-
175-
- name: Publish test results
176-
uses: actions/upload-artifact@v3
177-
with:
178-
name: Test results
179-
path: test-data.xml
180-
if: failure()
181-
182-
- name: Upload coverage to Codecov
183-
uses: codecov/codecov-action@v2
184-
with:
185-
flags: unittests
186-
name: codecov-pandas
187-
fail_ci_if_error: false

.github/workflows/python-dev.yml

+10-30
Original file line numberDiff line numberDiff line change
@@ -57,40 +57,20 @@ jobs:
5757
- name: Install dependencies
5858
shell: bash -el {0}
5959
run: |
60-
python -m pip install --upgrade pip setuptools wheel
61-
pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
62-
pip install git+https://github.com/nedbat/coveragepy.git
63-
pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov
64-
pip list
60+
python3 -m pip install --upgrade pip setuptools wheel
61+
python3 -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
62+
python3 -m pip install git+https://github.com/nedbat/coveragepy.git
63+
python3 -m pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
64+
python3 -m pip list
6565
6666
- name: Build Pandas
6767
run: |
68-
python setup.py build_ext -q -j2
69-
python -m pip install -e . --no-build-isolation --no-use-pep517
68+
python3 setup.py build_ext -q -j2
69+
python3 -m pip install -e . --no-build-isolation --no-use-pep517
7070
7171
- name: Build Version
7272
run: |
73-
python -c "import pandas; pandas.show_versions();"
73+
python3 -c "import pandas; pandas.show_versions();"
7474
75-
- name: Test with pytest
76-
shell: bash -el {0}
77-
run: |
78-
ci/run_tests.sh
79-
80-
- name: Publish test results
81-
uses: actions/upload-artifact@v3
82-
with:
83-
name: Test results
84-
path: test-data.xml
85-
if: failure()
86-
87-
- name: Report Coverage
88-
run: |
89-
coverage report -m
90-
91-
- name: Upload coverage to Codecov
92-
uses: codecov/codecov-action@v2
93-
with:
94-
flags: unittests
95-
name: codecov-pandas
96-
fail_ci_if_error: true
75+
- name: Test
76+
uses: ./.github/actions/run-tests

.github/workflows/sdist.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ jobs:
5959
name: ${{matrix.python-version}}-sdist.gz
6060
path: dist/*.gz
6161

62-
- uses: conda-incubator/[email protected]
62+
- name: Set up Conda
63+
uses: ./.github/actions/setup-conda
6364
with:
64-
activate-environment: pandas-sdist
65-
channels: conda-forge
66-
python-version: '${{ matrix.python-version }}'
67-
condarc-file: ci/condarc.yml
65+
environment-file: ""
66+
environment-name: pandas-sdist
67+
python-version: ${{ matrix.python-version }}
6868

6969
- name: Install pandas from sdist
7070
run: |

doc/source/development/extending.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,11 @@ applies only to certain dtypes.
7474
Extension types
7575
---------------
7676

77-
.. warning::
77+
.. note::
7878

79-
The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs are new and
80-
experimental. They may change between versions without warning.
79+
The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs were
80+
experimental prior to pandas 1.5. Starting with version 1.5, future changes will follow
81+
the :ref:`pandas deprecation policy <policies.version>`.
8182

8283
pandas defines an interface for implementing data types and arrays that *extend*
8384
NumPy's type system. pandas itself uses the extension system for some types

doc/source/reference/frame.rst

+1
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ Serialization / IO / conversion
373373

374374
DataFrame.from_dict
375375
DataFrame.from_records
376+
DataFrame.to_orc
376377
DataFrame.to_parquet
377378
DataFrame.to_pickle
378379
DataFrame.to_csv

doc/source/reference/io.rst

+1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ ORC
159159
:toctree: api/
160160

161161
read_orc
162+
DataFrame.to_orc
162163

163164
SAS
164165
~~~

doc/source/reference/testing.rst

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Exceptions and warnings
4545
errors.SettingWithCopyError
4646
errors.SettingWithCopyWarning
4747
errors.SpecificationError
48+
errors.UndefinedVariableError
4849
errors.UnsortedIndexError
4950
errors.UnsupportedFunctionCall
5051

doc/source/user_guide/groupby.rst

+9-1
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ different dtypes, then a common dtype will be determined in the same way as ``Da
761761
Transformation
762762
--------------
763763

764-
The ``transform`` method returns an object that is indexed the same (same size)
764+
The ``transform`` method returns an object that is indexed the same
765765
as the one being grouped. The transform function must:
766766

767767
* Return a result that is either the same size as the group chunk or
@@ -776,6 +776,14 @@ as the one being grouped. The transform function must:
776776
* (Optionally) operates on the entire group chunk. If this is supported, a
777777
fast path is used starting from the *second* chunk.
778778

779+
.. deprecated:: 1.5.0
780+
781+
When using ``.transform`` on a grouped DataFrame and the transformation function
782+
returns a DataFrame, currently pandas does not align the result's index
783+
with the input's index. This behavior is deprecated and alignment will
784+
be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
785+
result of the transformation function to avoid alignment.
786+
779787
Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
780788
transformation function. If the results from different groups have different dtypes, then
781789
a common dtype will be determined in the same way as ``DataFrame`` construction.

doc/source/user_guide/io.rst

+55-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
3030
binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
3131
binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`
3232
binary;`Parquet Format <https://parquet.apache.org/>`__;:ref:`read_parquet<io.parquet>`;:ref:`to_parquet<io.parquet>`
33-
binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;
33+
binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;:ref:`to_orc<io.orc>`
3434
binary;`Stata <https://en.wikipedia.org/wiki/Stata>`__;:ref:`read_stata<io.stata_reader>`;:ref:`to_stata<io.stata_writer>`
3535
binary;`SAS <https://en.wikipedia.org/wiki/SAS_(software)>`__;:ref:`read_sas<io.sas_reader>`;
3636
binary;`SPSS <https://en.wikipedia.org/wiki/SPSS>`__;:ref:`read_spss<io.spss_reader>`;
@@ -5562,13 +5562,64 @@ ORC
55625562
.. versionadded:: 1.0.0
55635563

55645564
Similar to the :ref:`parquet <io.parquet>` format, the `ORC Format <https://orc.apache.org/>`__ is a binary columnar serialization
5565-
for data frames. It is designed to make reading data frames efficient. pandas provides *only* a reader for the
5566-
ORC format, :func:`~pandas.read_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
5565+
for data frames. It is designed to make reading data frames efficient. pandas provides both the reader and the writer for the
5566+
ORC format, :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
55675567

55685568
.. warning::
55695569

55705570
* It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
5571-
* :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
5571+
* :func:`~pandas.DataFrame.to_orc` requires pyarrow>=7.0.0.
5572+
* :func:`~pandas.read_orc` and :func:`~pandas.DataFrame.to_orc` are not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
5573+
* For supported dtypes please refer to `supported ORC features in Arrow <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
5574+
* Currently timezones in datetime columns are not preserved when a dataframe is converted into ORC files.
5575+
5576+
.. ipython:: python
5577+
5578+
df = pd.DataFrame(
5579+
{
5580+
"a": list("abc"),
5581+
"b": list(range(1, 4)),
5582+
"c": np.arange(4.0, 7.0, dtype="float64"),
5583+
"d": [True, False, True],
5584+
"e": pd.date_range("20130101", periods=3),
5585+
}
5586+
)
5587+
5588+
df
5589+
df.dtypes
5590+
5591+
Write to an orc file.
5592+
5593+
.. ipython:: python
5594+
:okwarning:
5595+
5596+
df.to_orc("example_pa.orc", engine="pyarrow")
5597+
5598+
Read from an orc file.
5599+
5600+
.. ipython:: python
5601+
:okwarning:
5602+
5603+
result = pd.read_orc("example_pa.orc")
5604+
5605+
result.dtypes
5606+
5607+
Read only certain columns of an orc file.
5608+
5609+
.. ipython:: python
5610+
5611+
result = pd.read_orc(
5612+
"example_pa.orc",
5613+
columns=["a", "b"],
5614+
)
5615+
result.dtypes
5616+
5617+
5618+
.. ipython:: python
5619+
:suppress:
5620+
5621+
os.remove("example_pa.orc")
5622+
55725623
55735624
.. _io.sql:
55745625

0 commit comments

Comments
 (0)