Skip to content

Commit 521f58b

Browse files
Merge remote-tracking branch 'upstream/master' into groupby-select-list-fix
2 parents 8a66e55 + bac9a1b commit 521f58b

File tree

99 files changed

+3488
-2476
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+3488
-2476
lines changed

.github/workflows/ci.yml

+72-10
Original file line numberDiff line numberDiff line change
@@ -23,53 +23,53 @@ jobs:
2323

2424
- name: Looking for unwanted patterns
2525
run: ci/code_checks.sh patterns
26-
if: true
26+
if: always()
2727

2828
- name: Setup environment and build pandas
2929
run: ci/setup_env.sh
30-
if: true
30+
if: always()
3131

3232
- name: Linting
3333
run: |
3434
source activate pandas-dev
3535
ci/code_checks.sh lint
36-
if: true
36+
if: always()
3737

3838
- name: Dependencies consistency
3939
run: |
4040
source activate pandas-dev
4141
ci/code_checks.sh dependencies
42-
if: true
42+
if: always()
4343

4444
- name: Checks on imported code
4545
run: |
4646
source activate pandas-dev
4747
ci/code_checks.sh code
48-
if: true
48+
if: always()
4949

5050
- name: Running doctests
5151
run: |
5252
source activate pandas-dev
5353
ci/code_checks.sh doctests
54-
if: true
54+
if: always()
5555

5656
- name: Docstring validation
5757
run: |
5858
source activate pandas-dev
5959
ci/code_checks.sh docstrings
60-
if: true
60+
if: always()
6161

6262
- name: Typing validation
6363
run: |
6464
source activate pandas-dev
6565
ci/code_checks.sh typing
66-
if: true
66+
if: always()
6767

6868
- name: Testing docstring validation script
6969
run: |
7070
source activate pandas-dev
7171
pytest --capture=no --strict scripts
72-
if: true
72+
if: always()
7373

7474
- name: Running benchmarks
7575
run: |
@@ -87,11 +87,73 @@ jobs:
8787
else
8888
echo "Benchmarks did not run, no changes detected"
8989
fi
90-
if: true
90+
if: always()
9191

9292
- name: Publish benchmarks artifact
9393
uses: actions/upload-artifact@master
9494
with:
9595
name: Benchmarks log
9696
path: asv_bench/benchmarks.log
9797
if: failure()
98+
99+
web_and_docs:
100+
name: Web and docs
101+
runs-on: ubuntu-latest
102+
steps:
103+
104+
- name: Setting conda path
105+
run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
106+
107+
- name: Checkout
108+
uses: actions/checkout@v1
109+
110+
- name: Setup environment and build pandas
111+
run: ci/setup_env.sh
112+
113+
- name: Build website
114+
run: |
115+
source activate pandas-dev
116+
python web/pandas_web.py web/pandas --target-path=web/build
117+
118+
- name: Build documentation
119+
run: |
120+
source activate pandas-dev
121+
doc/make.py --warnings-are-errors | tee sphinx.log ; exit ${PIPESTATUS[0]}
122+
123+
# This can be removed when the ipython directive fails when there are errors,
124+
# including the `tee sphinx.log` in te previous step (https://github.com/ipython/ipython/issues/11547)
125+
- name: Check ipython directive errors
126+
run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
127+
128+
- name: Merge website and docs
129+
run: |
130+
mkdir -p pandas_web/docs
131+
cp -r web/build/* pandas_web/
132+
cp -r doc/build/html/* pandas_web/docs/
133+
if: github.event_name == 'push'
134+
135+
- name: Install Rclone
136+
run: sudo apt install rclone -y
137+
if: github.event_name == 'push'
138+
139+
- name: Set up Rclone
140+
run: |
141+
RCLONE_CONFIG_PATH=$HOME/.config/rclone/rclone.conf
142+
mkdir -p `dirname $RCLONE_CONFIG_PATH`
143+
echo "[ovh_cloud_pandas_web]" > $RCLONE_CONFIG_PATH
144+
echo "type = swift" >> $RCLONE_CONFIG_PATH
145+
echo "env_auth = false" >> $RCLONE_CONFIG_PATH
146+
echo "auth_version = 3" >> $RCLONE_CONFIG_PATH
147+
echo "auth = https://auth.cloud.ovh.net/v3/" >> $RCLONE_CONFIG_PATH
148+
echo "endpoint_type = public" >> $RCLONE_CONFIG_PATH
149+
echo "tenant_domain = default" >> $RCLONE_CONFIG_PATH
150+
echo "tenant = 2977553886518025" >> $RCLONE_CONFIG_PATH
151+
echo "domain = default" >> $RCLONE_CONFIG_PATH
152+
echo "user = w4KGs3pmDxpd" >> $RCLONE_CONFIG_PATH
153+
echo "key = ${{ secrets.ovh_object_store_key }}" >> $RCLONE_CONFIG_PATH
154+
echo "region = BHS" >> $RCLONE_CONFIG_PATH
155+
if: github.event_name == 'push'
156+
157+
- name: Sync web
158+
run: rclone sync pandas_web ovh_cloud_pandas_web:dev
159+
if: github.event_name == 'push'

.travis.yml

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
language: python
2-
python: 3.5
2+
python: 3.7
33

44
# To turn off cached cython files and compiler cache
55
# set NOCACHE-true
@@ -48,17 +48,12 @@ matrix:
4848
- mysql
4949
- postgresql
5050

51-
# In allow_failures
5251
- env:
5352
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
5453
services:
5554
- mysql
5655
- postgresql
5756

58-
allow_failures:
59-
- env:
60-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
61-
6257
before_install:
6358
- echo "before_install"
6459
# set non-blocking IO on travis

ci/azure/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- bash: |
3535
source activate pandas-dev
3636
conda list
37-
python setup.py build_ext -q -i
37+
python setup.py build_ext -q -i -j 4
3838
python -m pip install --no-build-isolation -e .
3939
displayName: 'Build'
4040

ci/deps/azure-36-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- pytest-azurepipelines
1414

1515
# pandas dependencies
16-
- beautifulsoup4==4.6.0
16+
- beautifulsoup4=4.6.0
1717
- bottleneck=1.2.*
1818
- lxml
1919
- matplotlib=2.2.2

doc/source/getting_started/10min.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,9 @@ Plotting
697697

698698
See the :ref:`Plotting <visualization>` docs.
699699

700+
We use the standard convention for referencing the matplotlib API:
701+
700702
.. ipython:: python
701-
:suppress:
702703
703704
import matplotlib.pyplot as plt
704705
plt.close('all')

doc/source/user_guide/integer_na.rst

+28
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ Nullable integer data type
1515
IntegerArray is currently experimental. Its API or implementation may
1616
change without warning.
1717

18+
.. versionchanged:: 1.0.0
19+
20+
Now uses :attr:`pandas.NA` as the missing value rather
21+
than :attr:`numpy.nan`.
1822

1923
In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent
2024
missing data. Because ``NaN`` is a float, this forces an array of integers with
@@ -23,6 +27,9 @@ much. But if your integer column is, say, an identifier, casting to float can
2327
be problematic. Some integers cannot even be represented as floating point
2428
numbers.
2529

30+
Construction
31+
------------
32+
2633
Pandas can represent integer data with possibly missing values using
2734
:class:`arrays.IntegerArray`. This is an :ref:`extension types <extending.extension-types>`
2835
implemented within pandas.
@@ -39,6 +46,12 @@ NumPy's ``'int64'`` dtype:
3946
4047
pd.array([1, 2, np.nan], dtype="Int64")
4148
49+
All NA-like values are replaced with :attr:`pandas.NA`.
50+
51+
.. ipython:: python
52+
53+
pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64")
54+
4255
This array can be stored in a :class:`DataFrame` or :class:`Series` like any
4356
NumPy array.
4457

@@ -78,6 +91,9 @@ with the dtype.
7891
In the future, we may provide an option for :class:`Series` to infer a
7992
nullable-integer dtype.
8093

94+
Operations
95+
----------
96+
8197
Operations involving an integer array will behave similar to NumPy arrays.
8298
Missing values will be propagated, and the data will be coerced to another
8399
dtype if needed.
@@ -123,3 +139,15 @@ Reduction and groupby operations such as 'sum' work as well.
123139
124140
df.sum()
125141
df.groupby('B').A.sum()
142+
143+
Scalar NA Value
144+
---------------
145+
146+
:class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar
147+
missing value. Slicing a single element that's missing will return
148+
:attr:`pandas.NA`
149+
150+
.. ipython:: python
151+
152+
a = pd.array([1, None], dtype="Int64")
153+
a[1]

doc/source/whatsnew/v1.0.0.rst

+67-2
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,8 @@ Other enhancements
221221
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
222222
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
223223
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
224-
225224
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
225+
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
226226

227227
Build Changes
228228
^^^^^^^^^^^^^
@@ -365,6 +365,64 @@ The following methods now also correctly output values for unobserved categories
365365
366366
As a reminder, you can specify the ``dtype`` to disable all inference.
367367

368+
:class:`arrays.IntegerArray` now uses :attr:`pandas.NA`
369+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
370+
371+
:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than
372+
:attr:`numpy.nan` as its missing value marker (:issue:`29964`).
373+
374+
*pandas 0.25.x*
375+
376+
.. code-block:: python
377+
378+
>>> a = pd.array([1, 2, None], dtype="Int64")
379+
>>> a
380+
<IntegerArray>
381+
[1, 2, NaN]
382+
Length: 3, dtype: Int64
383+
384+
>>> a[2]
385+
nan
386+
387+
*pandas 1.0.0*
388+
389+
.. ipython:: python
390+
391+
a = pd.array([1, 2, None], dtype="Int64")
392+
a[2]
393+
394+
See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA`
395+
and :attr:`numpy.nan`.
396+
397+
:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray`
398+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
399+
400+
Comparison operations on a :class:`arrays.IntegerArray` now returns a
401+
:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`).
402+
403+
*pandas 0.25.x*
404+
405+
.. code-block:: python
406+
407+
>>> a = pd.array([1, 2, None], dtype="Int64")
408+
>>> a
409+
<IntegerArray>
410+
[1, 2, NaN]
411+
Length: 3, dtype: Int64
412+
413+
>>> a > 1
414+
array([False, True, False])
415+
416+
*pandas 1.0.0*
417+
418+
.. ipython:: python
419+
420+
a = pd.array([1, 2, None], dtype="Int64")
421+
a > 1
422+
423+
Note that missing values now propagate, rather than always comparing unequal
424+
like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more.
425+
368426
By default :meth:`Categorical.min` now returns the minimum instead of np.nan
369427
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
370428

@@ -737,6 +795,7 @@ Datetimelike
737795
- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)
738796
- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`)
739797
- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`)
798+
- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`)
740799
- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`)
741800

742801
Timedelta
@@ -766,6 +825,7 @@ Numeric
766825
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
767826
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
768827
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
828+
- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
769829

770830
Conversion
771831
^^^^^^^^^^
@@ -785,6 +845,7 @@ Interval
785845

786846
- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`)
787847
- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`)
848+
- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`)
788849

789850
Indexing
790851
^^^^^^^^
@@ -848,6 +909,7 @@ Plotting
848909
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
849910
- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
850911
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
912+
- Allow :meth: `DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
851913
- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).
852914

853915
Groupby/resample/rolling
@@ -870,6 +932,7 @@ Groupby/resample/rolling
870932
- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`)
871933
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
872934
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
935+
- Bug in :meth:`GroupBy.pct_change` and :meth:`SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
873936

874937
Reshaping
875938
^^^^^^^^^
@@ -912,13 +975,15 @@ Other
912975
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
913976
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
914977
- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
978+
- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`)
915979
- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`)
916980
- Fix :class:`AbstractHolidayCalendar` to return correct results for
917981
years after 2030 (now goes up to 2200) (:issue:`27790`)
918982
- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`)
919983
- Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
920984
- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
921-
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:29069`)
985+
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
986+
922987

923988
.. _whatsnew_1000.contributors:
924989

pandas/_config/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def __setattr__(self, key, val):
197197
else:
198198
raise OptionError("You can only set the value of existing options")
199199

200-
def __getattr__(self, key):
200+
def __getattr__(self, key: str):
201201
prefix = object.__getattribute__(self, "prefix")
202202
if prefix:
203203
prefix += "."

0 commit comments

Comments
 (0)