Skip to content

Commit 83052c1

Browse files
committed
Merge remote-tracking branch 'upstream/main' into clean/environment_yml
2 parents d094c4f + d489393 commit 83052c1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1240
-361
lines changed

.github/actions/setup-conda/action.yml

+12-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ inputs:
33
environment-file:
44
description: Conda environment file to use.
55
default: environment.yml
6+
environment-name:
7+
description: Name to use for the Conda environment
8+
default: test
9+
python-version:
10+
description: Python version to install
11+
required: false
612
pyarrow-version:
713
description: If set, overrides the PyArrow version in the Conda environment to the given string.
814
required: false
@@ -18,10 +24,14 @@ runs:
1824
if: ${{ inputs.pyarrow-version }}
1925

2026
- name: Install ${{ inputs.environment-file }}
21-
uses: conda-incubator/setup-miniconda@v2
27+
uses: conda-incubator/setup-miniconda@v2.1.1
2228
with:
2329
environment-file: ${{ inputs.environment-file }}
30+
activate-environment: ${{ inputs.environment-name }}
31+
python-version: ${{ inputs.python-version }}
2432
channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
2533
channels: conda-forge
26-
mamba-version: "0.23"
34+
mamba-version: "0.24"
2735
use-mamba: true
36+
use-only-tar-bz2: true
37+
condarc-file: ci/condarc.yml

.github/workflows/asv-bot.yml

+2-6
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,8 @@ jobs:
4141

4242
# Although asv sets up its own env, deps are still needed
4343
# during discovery process
44-
- uses: conda-incubator/[email protected]
45-
with:
46-
activate-environment: pandas-dev
47-
channel-priority: strict
48-
environment-file: ${{ env.ENV_FILE }}
49-
use-only-tar-bz2: true
44+
- name: Set up Conda
45+
uses: ./.github/actions/setup-conda
5046

5147
- name: Run benchmarks
5248
id: bench

.github/workflows/code-checks.yml

+4-16
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,8 @@ jobs:
5858
path: ~/conda_pkgs_dir
5959
key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
6060

61-
- uses: conda-incubator/[email protected]
62-
with:
63-
mamba-version: "*"
64-
channels: conda-forge
65-
activate-environment: pandas-dev
66-
channel-priority: strict
67-
environment-file: ${{ env.ENV_FILE }}
68-
use-only-tar-bz2: true
61+
- name: Set up Conda
62+
uses: ./.github/actions/setup-conda
6963

7064
- name: Build Pandas
7165
id: build
@@ -127,14 +121,8 @@ jobs:
127121
path: ~/conda_pkgs_dir
128122
key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
129123

130-
- uses: conda-incubator/[email protected]
131-
with:
132-
mamba-version: "*"
133-
channels: conda-forge
134-
activate-environment: pandas-dev
135-
channel-priority: strict
136-
environment-file: ${{ env.ENV_FILE }}
137-
use-only-tar-bz2: true
124+
- name: Set up Conda
125+
uses: ./.github/actions/setup-conda
138126

139127
- name: Build Pandas
140128
id: build

.github/workflows/sdist.yml

+5-4
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,12 @@ jobs:
5959
name: ${{matrix.python-version}}-sdist.gz
6060
path: dist/*.gz
6161

62-
- uses: conda-incubator/[email protected]
62+
- name: Set up Conda
63+
uses: ./.github/actions/setup-conda
6364
with:
64-
activate-environment: pandas-sdist
65-
channels: conda-forge
66-
python-version: '${{ matrix.python-version }}'
65+
environment-file: ""
66+
environment-name: pandas-sdist
67+
python-version: ${{ matrix.python-version }}
6768

6869
- name: Install pandas from sdist
6970
run: |

.github/workflows/ubuntu.yml

+3-10
Original file line numberDiff line numberDiff line change
@@ -147,18 +147,11 @@ jobs:
147147
# xsel for clipboard tests
148148
run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
149149

150-
- uses: conda-incubator/[email protected]
150+
- name: Set up Conda
151+
uses: ./.github/actions/setup-conda
151152
with:
152-
mamba-version: "*"
153-
channels: conda-forge
154-
activate-environment: pandas-dev
155-
channel-priority: flexible
156153
environment-file: ${{ env.ENV_FILE }}
157-
use-only-tar-bz2: true
158-
159-
- name: Upgrade Arrow version
160-
run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=${{ matrix.pyarrow_version }}
161-
if: ${{ matrix.pyarrow_version }}
154+
pyarrow-version: ${{ matrix.pyarrow_version }}
162155

163156
- name: Build Pandas
164157
uses: ./.github/actions/build_pandas

ci/condarc.yml

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# https://docs.conda.io/projects/conda/en/latest/configuration.html
2+
3+
# always_yes (NoneType, bool)
4+
# aliases: yes
5+
# Automatically choose the 'yes' option whenever asked to proceed with a
6+
# conda operation, such as when running `conda install`.
7+
#
8+
always_yes: true
9+
10+
# remote_connect_timeout_secs (float)
11+
# The number seconds conda will wait for your client to establish a
12+
# connection to a remote url resource.
13+
#
14+
remote_connect_timeout_secs: 30.0
15+
16+
# remote_max_retries (int)
17+
# The maximum number of retries each HTTP connection should attempt.
18+
#
19+
remote_max_retries: 10
20+
21+
# remote_backoff_factor (int)
22+
# The factor determines the time HTTP connection should wait for
23+
# attempt.
24+
#
25+
remote_backoff_factor: 3
26+
27+
# remote_read_timeout_secs (float)
28+
# Once conda has connected to a remote resource and sent an HTTP
29+
# request, the read timeout is the number of seconds conda will wait for
30+
# the server to send a response.
31+
#
32+
remote_read_timeout_secs: 60.0

doc/source/development/extending.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,11 @@ applies only to certain dtypes.
7474
Extension types
7575
---------------
7676

77-
.. warning::
77+
.. note::
7878

79-
The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs are new and
80-
experimental. They may change between versions without warning.
79+
The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs were
80+
experimental prior to pandas 1.5. Starting with version 1.5, future changes will follow
81+
the :ref:`pandas deprecation policy <policies.version>`.
8182

8283
pandas defines an interface for implementing data types and arrays that *extend*
8384
NumPy's type system. pandas itself uses the extension system for some types

doc/source/user_guide/groupby.rst

+9-1
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ different dtypes, then a common dtype will be determined in the same way as ``Da
761761
Transformation
762762
--------------
763763

764-
The ``transform`` method returns an object that is indexed the same (same size)
764+
The ``transform`` method returns an object that is indexed the same
765765
as the one being grouped. The transform function must:
766766

767767
* Return a result that is either the same size as the group chunk or
@@ -776,6 +776,14 @@ as the one being grouped. The transform function must:
776776
* (Optionally) operates on the entire group chunk. If this is supported, a
777777
fast path is used starting from the *second* chunk.
778778

779+
.. deprecated:: 1.5.0
780+
781+
When using ``.transform`` on a grouped DataFrame and the transformation function
782+
returns a DataFrame, currently pandas does not align the result's index
783+
with the input's index. This behavior is deprecated and alignment will
784+
be performed in a future version of pandas. You can apply ``.to_numpy()`` to the
785+
result of the transformation function to avoid alignment.
786+
779787
Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
780788
transformation function. If the results from different groups have different dtypes, then
781789
a common dtype will be determined in the same way as ``DataFrame`` construction.

doc/source/whatsnew/v1.5.0.rst

+6
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ Other enhancements
133133
- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`)
134134
- :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
135135
- Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`)
136+
- :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`).
136137
- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
137138
- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`)
138139
- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba <https://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`45428`)
@@ -689,6 +690,8 @@ Other Deprecations
689690
- Deprecated the ``closed`` argument in :class:`intervaltree` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
690691
- Deprecated the ``closed`` argument in :class:`ArrowInterval` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
691692
- Deprecated allowing ``unit="M"`` or ``unit="Y"`` in :class:`Timestamp` constructor with a non-round float value (:issue:`47267`)
693+
- Deprecated the ``display.column_space`` global configuration option (:issue:`7576`)
694+
- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
692695
-
693696

694697
.. ---------------------------------------------------------------------------
@@ -736,6 +739,7 @@ Datetimelike
736739
- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`)
737740
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
738741
- Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`)
742+
- Bug in :class:`DatetimeArray` construction when passed another :class:`DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
739743
-
740744

741745
Timedelta
@@ -842,8 +846,10 @@ I/O
842846
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
843847
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
844848
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
849+
- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`)
845850
- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`)
846851
- Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
852+
- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)
847853

848854
Period
849855
^^^^^^

pandas/_libs/properties.pyi

+22-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,28 @@
1-
# pyright: reportIncompleteStub = false
2-
from typing import Any
1+
from typing import (
2+
Sequence,
3+
overload,
4+
)
5+
6+
from pandas._typing import (
7+
AnyArrayLike,
8+
DataFrame,
9+
Index,
10+
Series,
11+
)
312

413
# note: this is a lie to make type checkers happy (they special
514
# case property). cache_readonly uses attribute names similar to
615
# property (fget) but it does not provide fset and fdel.
716
cache_readonly = property
817

9-
def __getattr__(name: str) -> Any: ... # incomplete
18+
class AxisProperty:
19+
20+
axis: int
21+
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...
22+
@overload
23+
def __get__(self, obj: DataFrame | Series, type) -> Index: ...
24+
@overload
25+
def __get__(self, obj: None, type) -> AxisProperty: ...
26+
def __set__(
27+
self, obj: DataFrame | Series, value: AnyArrayLike | Sequence
28+
) -> None: ...

pandas/_libs/tslibs/conversion.pxd

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ from numpy cimport (
88
ndarray,
99
)
1010

11-
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
11+
from pandas._libs.tslibs.np_datetime cimport (
12+
NPY_DATETIMEUNIT,
13+
npy_datetimestruct,
14+
)
1215

1316

1417
cdef class _TSObject:
@@ -31,3 +34,5 @@ cdef int64_t get_datetime64_nanos(object val) except? -1
3134
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
3235
cdef int64_t cast_from_unit(object ts, str unit) except? -1
3336
cpdef (int64_t, int) precision_from_unit(str unit)
37+
38+
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

pandas/_libs/tslibs/conversion.pyx

+12-7
Original file line numberDiff line numberDiff line change
@@ -296,14 +296,18 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
296296
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
297297
f'Timestamp')
298298

299+
maybe_localize_tso(obj, tz, NPY_FR_ns)
300+
return obj
301+
302+
303+
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
299304
if tz is not None:
300-
_localize_tso(obj, tz)
305+
_localize_tso(obj, tz, reso)
301306

302307
if obj.value != NPY_NAT:
303308
# check_overflows needs to run after _localize_tso
304-
check_dts_bounds(&obj.dts)
309+
check_dts_bounds(&obj.dts, reso)
305310
check_overflows(obj)
306-
return obj
307311

308312

309313
cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
@@ -401,7 +405,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
401405
return obj
402406

403407
cdef:
404-
Localizer info = Localizer(tz)
408+
Localizer info = Localizer(tz, NPY_FR_ns)
405409

406410
# Infer fold from offset-adjusted obj.value
407411
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
@@ -548,7 +552,7 @@ cdef inline check_overflows(_TSObject obj):
548552
# ----------------------------------------------------------------------
549553
# Localization
550554

551-
cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
555+
cdef inline void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso):
552556
"""
553557
Given the UTC nanosecond timestamp in obj.value, find the wall-clock
554558
representation of that timestamp in the given timezone.
@@ -557,6 +561,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
557561
----------
558562
obj : _TSObject
559563
tz : tzinfo
564+
reso : NPY_DATETIMEUNIT
560565
561566
Returns
562567
-------
@@ -569,7 +574,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
569574
cdef:
570575
int64_t local_val
571576
Py_ssize_t outpos = -1
572-
Localizer info = Localizer(tz)
577+
Localizer info = Localizer(tz, reso)
573578

574579
assert obj.tzinfo is None
575580

@@ -584,7 +589,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
584589
# infer we went through a pytz path, will have outpos!=-1
585590
tz = tz._tzinfos[tz._transition_info[outpos]]
586591

587-
dt64_to_dtstruct(local_val, &obj.dts)
592+
pandas_datetime_to_datetimestruct(local_val, reso, &obj.dts)
588593

589594
obj.tzinfo = tz
590595

pandas/_libs/tslibs/timestamps.pyi

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class Timestamp(datetime):
5959
# While Timestamp can return pd.NaT, having the constructor return
6060
# a Union with NaTType makes things awkward for users of pandas
6161
def _set_freq(self, freq: BaseOffset | None) -> None: ...
62+
@classmethod
63+
def _from_value_and_reso(
64+
cls, value: int, reso: int, tz: _tzinfo | None
65+
) -> Timestamp: ...
6266
@property
6367
def year(self) -> int: ...
6468
@property

0 commit comments

Comments
 (0)