Skip to content

Commit 3d50970

Browse files
Merge remote-tracking branch 'upstream/master' into pandas-devGH-26206-datetime-unit-out-of-bounds
2 parents b69f82e + a890caf commit 3d50970

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+571
-273
lines changed

LICENSES/HAVEN_LICENSE

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
YEAR: 2013-2016
2+
COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller

LICENSES/HAVEN_MIT

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
Based on http://opensource.org/licenses/MIT
2+
3+
This is a template. Complete and ship as file LICENSE the following 2
4+
lines (only)
5+
6+
YEAR:
7+
COPYRIGHT HOLDER:
8+
9+
and specify as
10+
11+
License: MIT + file LICENSE
12+
13+
Copyright (c) <YEAR>, <COPYRIGHT HOLDER>
14+
15+
Permission is hereby granted, free of charge, to any person obtaining
16+
a copy of this software and associated documentation files (the
17+
"Software"), to deal in the Software without restriction, including
18+
without limitation the rights to use, copy, modify, merge, publish,
19+
distribute, sublicense, and/or sell copies of the Software, and to
20+
permit persons to whom the Software is furnished to do so, subject to
21+
the following conditions:
22+
23+
The above copyright notice and this permission notice shall be
24+
included in all copies or substantial portions of the Software.
25+
26+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
30+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
31+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
32+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

asv_bench/benchmarks/sparse.py

+11-19
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import itertools
2-
31
import numpy as np
42
import scipy.sparse
5-
from pandas import (SparseSeries, SparseDataFrame, SparseArray, Series,
6-
date_range, MultiIndex)
3+
4+
import pandas as pd
5+
from pandas import MultiIndex, Series, SparseArray, date_range
76

87

98
def make_array(size, dense_proportion, fill_value, dtype):
@@ -25,10 +24,10 @@ def setup(self):
2524
data = np.random.randn(N)[:-i]
2625
idx = rng[:-i]
2726
data[100:] = np.nan
28-
self.series[i] = SparseSeries(data, index=idx)
27+
self.series[i] = pd.Series(pd.SparseArray(data), index=idx)
2928

3029
def time_series_to_frame(self):
31-
SparseDataFrame(self.series)
30+
pd.DataFrame(self.series)
3231

3332

3433
class SparseArrayConstructor:
@@ -51,16 +50,9 @@ def setup(self):
5150
N = 1000
5251
self.arr = np.arange(N)
5352
self.sparse = scipy.sparse.rand(N, N, 0.005)
54-
self.dict = dict(zip(range(N), itertools.repeat([0])))
55-
56-
def time_constructor(self):
57-
SparseDataFrame(columns=self.arr, index=self.arr)
5853

5954
def time_from_scipy(self):
60-
SparseDataFrame(self.sparse)
61-
62-
def time_from_dict(self):
63-
SparseDataFrame(self.dict)
55+
pd.DataFrame.sparse.from_spmatrix(self.sparse)
6456

6557

6658
class FromCoo:
@@ -71,7 +63,7 @@ def setup(self):
7163
shape=(100, 100))
7264

7365
def time_sparse_series_from_coo(self):
74-
SparseSeries.from_coo(self.matrix)
66+
pd.Series.sparse.from_coo(self.matrix)
7567

7668

7769
class ToCoo:
@@ -82,12 +74,12 @@ def setup(self):
8274
s[100] = -1.0
8375
s[999] = 12.1
8476
s.index = MultiIndex.from_product([range(10)] * 4)
85-
self.ss = s.to_sparse()
77+
self.ss = s.astype("Sparse")
8678

8779
def time_sparse_series_to_coo(self):
88-
self.ss.to_coo(row_levels=[0, 1],
89-
column_levels=[2, 3],
90-
sort_labels=True)
80+
self.ss.sparse.to_coo(row_levels=[0, 1],
81+
column_levels=[2, 3],
82+
sort_labels=True)
9183

9284

9385
class Arithmetic:

azure-pipelines.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ jobs:
55
parameters:
66
name: macOS
77
vmImage: xcode9-macos10.13
8+
89
- template: ci/azure/posix.yml
910
parameters:
1011
name: Linux
@@ -134,7 +135,10 @@ jobs:
134135
- script: |
135136
export PATH=$HOME/miniconda3/bin:$PATH
136137
source activate pandas-dev
137-
doc/make.py
138+
# Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547)
139+
doc/make.py --warnings-are-errors | tee sphinx.log ; SPHINX_RET=${PIPESTATUS[0]}
140+
grep -B1 "^<<<-------------------------------------------------------------------------$" sphinx.log ; IPY_RET=$(( $? != 1 ))
141+
exit $(( $SPHINX_RET + $IPY_RET ))
138142
displayName: 'Build documentation'
139143
140144
- script: |

ci/deps/azure-macos-35.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies:
2323
- xlsxwriter
2424
- xlwt
2525
- pip:
26+
- pyreadstat
2627
# universal
2728
- pytest==4.5.0
2829
- pytest-xdist

ci/deps/azure-windows-37.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ dependencies:
3030
- pytest-mock
3131
- moto
3232
- hypothesis>=3.58.0
33+
- pyreadstat

ci/deps/travis-37.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ dependencies:
1919
- hypothesis>=3.58.0
2020
- s3fs
2121
- pip
22+
- pyreadstat
2223
- pip:
2324
- moto

doc/source/development/contributing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ You'll need to have at least python3.5 installed on your system.
221221
# Use an ENV_DIR of your choice. We'll use ~/virtualenvs/pandas-dev
222222
# Any parent directories should already exist
223223
python3 -m venv ~/virtualenvs/pandas-dev
224-
# Activate the virtulaenv
224+
# Activate the virtualenv
225225
. ~/virtualenvs/pandas-dev/bin/activate
226226
227227
# Install the build dependencies

doc/source/getting_started/10min.rst

+1
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,7 @@ See the :ref:`Plotting <visualization>` docs.
712712
plt.close('all')
713713
714714
.. ipython:: python
715+
:okwarning:
715716
716717
ts = pd.Series(np.random.randn(1000),
717718
index=pd.date_range('1/1/2000', periods=1000))

doc/source/index.rst.template

+2-4
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ See the :ref:`overview` for more detail about what's in the library.
3838
:maxdepth: 3
3939
:hidden:
4040
{% endif %}
41-
42-
{% if not single_doc -%}
41+
{% if not single_doc %}
4342
What's New in 0.25.0 <whatsnew/v0.25.0>
4443
install
4544
getting_started/index
@@ -52,8 +51,7 @@ See the :ref:`overview` for more detail about what's in the library.
5251
{% if not single_doc -%}
5352
development/index
5453
whatsnew/index
55-
{% endif -%}
56-
54+
{% endif %}
5755

5856
* :doc:`whatsnew/v0.25.0`
5957
* :doc:`install`

doc/source/install.rst

+1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ pandas-gbq 0.8.0 Google Big Query access
285285
psycopg2 PostgreSQL engine for sqlalchemy
286286
pyarrow 0.9.0 Parquet and feather reading / writing
287287
pymysql MySQL engine for sqlalchemy
288+
pyreadstat SPSS files (.sav) reading
288289
qtpy Clipboard I/O
289290
s3fs 0.0.8 Amazon S3 access
290291
xarray 0.8.2 pandas-like API for N-dimensional data

doc/source/reference/arrays.rst

+29
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ If the data are tz-aware, then every value in the array must have the same timez
144144

145145
.. autosummary::
146146
:toctree: api/
147+
:template: autosummary/class_without_autosummary.rst
147148

148149
arrays.DatetimeArray
149150

@@ -204,6 +205,7 @@ A collection of timedeltas may be stored in a :class:`TimedeltaArray`.
204205

205206
.. autosummary::
206207
:toctree: api/
208+
:template: autosummary/class_without_autosummary.rst
207209

208210
arrays.TimedeltaArray
209211

@@ -263,6 +265,7 @@ Every period in a ``PeriodArray`` must have the same ``freq``.
263265

264266
.. autosummary::
265267
:toctree: api/
268+
:template: autosummary/class_without_autosummary.rst
266269

267270
arrays.PeriodArray
268271

@@ -304,6 +307,7 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`.
304307

305308
.. autosummary::
306309
:toctree: api/
310+
:template: autosummary/class_without_autosummary.rst
307311

308312
arrays.IntervalArray
309313

@@ -313,6 +317,29 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`.
313317

314318
IntervalDtype
315319

320+
321+
.. Those attributes and methods are included in the API because the docstrings
322+
.. of IntervalIndex and IntervalArray are shared. Including it here to make
323+
.. sure a docstring page is built for them to avoid warnings
324+
325+
..
326+
.. autosummary::
327+
:toctree: api/
328+
329+
arrays.IntervalArray.left
330+
arrays.IntervalArray.right
331+
arrays.IntervalArray.closed
332+
arrays.IntervalArray.mid
333+
arrays.IntervalArray.length
334+
arrays.IntervalArray.is_non_overlapping_monotonic
335+
arrays.IntervalArray.from_arrays
336+
arrays.IntervalArray.from_tuples
337+
arrays.IntervalArray.from_breaks
338+
arrays.IntervalArray.overlaps
339+
arrays.IntervalArray.set_closed
340+
arrays.IntervalArray.to_tuples
341+
342+
316343
.. _api.arrays.integer_na:
317344

318345
Nullable Integer
@@ -323,6 +350,7 @@ Pandas provides this through :class:`arrays.IntegerArray`.
323350

324351
.. autosummary::
325352
:toctree: api/
353+
:template: autosummary/class_without_autosummary.rst
326354

327355
arrays.IntegerArray
328356

@@ -414,6 +442,7 @@ be stored efficiently as a :class:`SparseArray`.
414442

415443
.. autosummary::
416444
:toctree: api/
445+
:template: autosummary/class_without_autosummary.rst
417446

418447
SparseArray
419448

doc/source/reference/extensions.rst

+5
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,9 @@ objects.
1919
api.extensions.register_index_accessor
2020
api.extensions.ExtensionDtype
2121
api.extensions.ExtensionArray
22+
23+
.. autosummary::
24+
:toctree: api/
25+
:template: autosummary/class_without_autosummary.rst
26+
2227
arrays.PandasArray

doc/source/user_guide/advanced.rst

+2
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,8 @@ faster than fancy indexing.
703703
%timeit arr[indexer]
704704
%timeit arr.take(indexer, axis=0)
705705
706+
.. ipython:: python
707+
706708
ser = pd.Series(arr[:, 0])
707709
%timeit ser.iloc[indexer]
708710
%timeit ser.take(indexer)

doc/source/user_guide/cookbook.rst

-5
Original file line numberDiff line numberDiff line change
@@ -1260,24 +1260,19 @@ The `method` argument within `DataFrame.corr` can accept a callable in addition
12601260
n = len(x)
12611261
a = np.zeros(shape=(n, n))
12621262
b = np.zeros(shape=(n, n))
1263-
12641263
for i in range(n):
12651264
for j in range(i + 1, n):
12661265
a[i, j] = abs(x[i] - x[j])
12671266
b[i, j] = abs(y[i] - y[j])
1268-
12691267
a += a.T
12701268
b += b.T
1271-
12721269
a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
12731270
b_bar = np.vstack([np.nanmean(b, axis=0)] * n)
1274-
12751271
A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
12761272
B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())
12771273
cov_ab = np.sqrt(np.nansum(A * B)) / n
12781274
std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n)
12791275
std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n)
1280-
12811276
return cov_ab / std_a / std_b
12821277
12831278
df = pd.DataFrame(np.random.normal(size=(100, 3)))

doc/source/user_guide/io.rst

+26-11
Original file line numberDiff line numberDiff line change
@@ -3249,24 +3249,35 @@ And then import the data directly to a ``DataFrame`` by calling:
32493249

32503250
.. code-block:: python
32513251
3252-
clipdf = pd.read_clipboard()
3253-
3254-
.. ipython:: python
3255-
3256-
clipdf
3257-
3252+
>>> clipdf = pd.read_clipboard()
3253+
>>> clipdf
3254+
A B C
3255+
x 1 4 p
3256+
y 2 5 q
3257+
z 3 6 r
32583258
32593259
The ``to_clipboard`` method can be used to write the contents of a ``DataFrame`` to
32603260
the clipboard. Following which you can paste the clipboard contents into other
32613261
applications (CTRL-V on many operating systems). Here we illustrate writing a
32623262
``DataFrame`` into clipboard and reading it back.
32633263

3264-
.. ipython:: python
3264+
.. code-block:: python
32653265
3266-
df = pd.DataFrame(np.random.randn(5, 3))
3267-
df
3268-
df.to_clipboard()
3269-
pd.read_clipboard()
3266+
>>> df = pd.DataFrame({'A': [1, 2, 3],
3267+
... 'B': [4, 5, 6],
3268+
... 'C': ['p', 'q', 'r']},
3269+
... index=['x', 'y', 'z'])
3270+
>>> df
3271+
A B C
3272+
x 1 4 p
3273+
y 2 5 q
3274+
z 3 6 r
3275+
>>> df.to_clipboard()
3276+
>>> pd.read_clipboard()
3277+
A B C
3278+
x 1 4 p
3279+
y 2 5 q
3280+
z 3 6 r
32703281
32713282
We can see that we got the same content back, which we had earlier written to the clipboard.
32723283

@@ -4703,6 +4714,7 @@ See the documentation for `pyarrow <https://arrow.apache.org/docs/python/>`__ an
47034714
Write to a parquet file.
47044715

47054716
.. ipython:: python
4717+
:okwarning:
47064718
47074719
df.to_parquet('example_pa.parquet', engine='pyarrow')
47084720
df.to_parquet('example_fp.parquet', engine='fastparquet')
@@ -4720,6 +4732,7 @@ Read from a parquet file.
47204732
Read only certain columns of a parquet file.
47214733

47224734
.. ipython:: python
4735+
:okwarning:
47234736
47244737
result = pd.read_parquet('example_fp.parquet',
47254738
engine='fastparquet', columns=['a', 'b'])
@@ -4742,6 +4755,7 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
47424755
more columns in the output file. Thus, this code:
47434756

47444757
.. ipython:: python
4758+
:okwarning:
47454759
47464760
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
47474761
df.to_parquet('test.parquet', engine='pyarrow')
@@ -4758,6 +4772,7 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
47584772
:func:`~pandas.DataFrame.to_parquet`:
47594773

47604774
.. ipython:: python
4775+
:okwarning:
47614776
47624777
df.to_parquet('test.parquet', index=False)
47634778

0 commit comments

Comments
 (0)