Skip to content

Commit 16a5297

Browse files
authored
Merge branch 'master' into fixturize_frame_tests_1
2 parents 733b889 + 2f1b842 commit 16a5297

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+749
-254
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
<tr>
5757
<td></td>
5858
<td>
59-
<a href="https://ci.appveyor.com/project/pandas-dev/pandas">
60-
<img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
59+
<a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
60+
<img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
6161
</a>
6262
</td>
6363
</tr>

asv_bench/benchmarks/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
try:
1010
hashing = import_module(imp)
1111
break
12-
except:
12+
except (ImportError, TypeError, ValueError):
1313
pass
1414

1515
from .pandas_vb_common import setup # noqa

asv_bench/benchmarks/frame_methods.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -505,14 +505,21 @@ class NSort(object):
505505
param_names = ['keep']
506506

507507
def setup(self, keep):
508-
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
508+
self.df = DataFrame(np.random.randn(100000, 3),
509+
columns=list('ABC'))
509510

510-
def time_nlargest(self, keep):
511+
def time_nlargest_one_column(self, keep):
511512
self.df.nlargest(100, 'A', keep=keep)
512513

513-
def time_nsmallest(self, keep):
514+
def time_nlargest_two_columns(self, keep):
515+
self.df.nlargest(100, ['A', 'B'], keep=keep)
516+
517+
def time_nsmallest_one_column(self, keep):
514518
self.df.nsmallest(100, 'A', keep=keep)
515519

520+
def time_nsmallest_two_columns(self, keep):
521+
self.df.nsmallest(100, ['A', 'B'], keep=keep)
522+
516523

517524
class Describe(object):
518525

asv_bench/benchmarks/io/csv.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
import random
2-
import timeit
32
import string
43

54
import numpy as np
65
import pandas.util.testing as tm
76
from pandas import DataFrame, Categorical, date_range, read_csv
8-
from pandas.compat import PY2
97
from pandas.compat import cStringIO as StringIO
108

119
from ..pandas_vb_common import setup, BaseIO # noqa
@@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
181179
names=list('abc'), float_precision=float_precision)
182180

183181
def time_read_csv_python_engine(self, sep, decimal, float_precision):
184-
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
185-
float_precision=None, names=list('abc'))
182+
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
183+
engine='python', float_precision=None, names=list('abc'))
186184

187185

188186
class ReadCSVCategorical(BaseIO):

asv_bench/benchmarks/join_merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def setup(self):
2929
try:
3030
with warnings.catch_warnings(record=True):
3131
self.mdf1.consolidate(inplace=True)
32-
except:
32+
except (AttributeError, TypeError):
3333
pass
3434
self.mdf2 = self.mdf1.copy()
3535
self.mdf2.index = self.df2.index

asv_bench/benchmarks/pandas_vb_common.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
from importlib import import_module
33

44
import numpy as np
5-
from pandas import Panel
65

76
# Compatibility import for lib
87
for imp in ['pandas._libs.lib', 'pandas.lib']:
98
try:
109
lib = import_module(imp)
1110
break
12-
except:
11+
except (ImportError, TypeError, ValueError):
1312
pass
1413

1514
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
@@ -34,7 +33,7 @@ def remove(self, f):
3433
"""Remove created files"""
3534
try:
3635
os.remove(f)
37-
except:
36+
except OSError:
3837
# On Windows, attempting to remove a file that is in use
3938
# causes an exception to be raised
4039
pass

asv_bench/benchmarks/stat_ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
1818
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
1919
try:
2020
pd.options.compute.use_bottleneck = use_bottleneck
21-
except:
21+
except TypeError:
2222
from pandas.core import nanops
2323
nanops._USE_BOTTLENECK = use_bottleneck
2424
self.df_func = getattr(df, op)
@@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
5656
s = pd.Series(np.random.randn(100000)).astype(dtype)
5757
try:
5858
pd.options.compute.use_bottleneck = use_bottleneck
59-
except:
59+
except TypeError:
6060
from pandas.core import nanops
6161
nanops._USE_BOTTLENECK = use_bottleneck
6262
self.s_func = getattr(s, op)

asv_bench/benchmarks/timeseries.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import warnings
21
from datetime import timedelta
32

43
import numpy as np

ci/doctests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
2121

2222
# DataFrame / Series docstrings
2323
pytest --doctest-modules -v pandas/core/frame.py \
24-
-k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
24+
-k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
2525

2626
if [ $? -ne "0" ]; then
2727
RET=1

ci/requirements-optional-pip.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ lxml
1414
matplotlib
1515
nbsphinx
1616
numexpr
17-
openpyxl=2.5.5
17+
openpyxl==2.5.5
1818
pyarrow
1919
pymysql
2020
tables
@@ -28,4 +28,4 @@ statsmodels
2828
xarray
2929
xlrd
3030
xlsxwriter
31-
xlwt
31+
xlwt
201 KB
Binary file not shown.
74.7 KB
Binary file not shown.
-201 KB
Binary file not shown.
-103 KB
Binary file not shown.

doc/make.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,10 @@ def _sphinx_build(self, kind):
233233
'-b{}'.format(kind),
234234
'-{}'.format(
235235
'v' * self.verbosity) if self.verbosity else '',
236-
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
236+
'-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
237237
'-Dexclude_patterns={}'.format(self.exclude_patterns),
238-
SOURCE_PATH,
239-
os.path.join(BUILD_PATH, kind))
238+
'"{}"'.format(SOURCE_PATH),
239+
'"{}"'.format(os.path.join(BUILD_PATH, kind)))
240240

241241
def _open_browser(self):
242242
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')

doc/source/api.rst

+9
Original file line numberDiff line numberDiff line change
@@ -2603,3 +2603,12 @@ objects.
26032603
generated/pandas.Series.ix
26042604
generated/pandas.Series.imag
26052605
generated/pandas.Series.real
2606+
2607+
2608+
.. Can't convince sphinx to generate toctree for this class attribute.
2609+
.. So we do it manually to avoid a warning
2610+
2611+
.. toctree::
2612+
:hidden:
2613+
2614+
generated/pandas.api.extensions.ExtensionDtype.na_value

doc/source/basics.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
19351935
* :ref:`Categorical <categorical>`
19361936
* :ref:`Datetime with Timezone <timeseries.timezone_series>`
19371937
* :ref:`Period <timeseries.periods>`
1938-
* :ref:`Interval <advanced.indexing.intervallindex>`
1938+
* :ref:`Interval <indexing.intervallindex>`
19391939

19401940
Pandas uses the ``object`` dtype for storing strings.
19411941

doc/source/computation.rst

+15
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,21 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
153153
frame.corr(min_periods=12)
154154
155155
156+
.. versionadded:: 0.24.0
157+
158+
The ``method`` argument can also be a callable for a generic correlation
159+
calculation. In this case, it should be a single function
160+
that produces a single value from two ndarray inputs. Suppose we wanted to
161+
compute the correlation based on histogram intersection:
162+
163+
.. ipython:: python
164+
165+
# histogram intersection
166+
histogram_intersection = lambda a, b: np.minimum(
167+
np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
168+
).sum()
169+
frame.corr(method=histogram_intersection)
170+
156171
A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to
157172
compute the correlation between like-labeled Series contained in different
158173
DataFrame objects.

doc/source/cookbook.rst

+2-4
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
505505
.. ipython:: python
506506
507507
df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
508-
509508
gb = df.groupby('A')
510509
511510
def replace(g):
512-
mask = g < 0
513-
g.loc[mask] = g[~mask].mean()
514-
return g
511+
mask = g < 0
512+
return g.where(mask, g[~mask].mean())
515513
516514
gb.transform(replace)
517515

doc/source/ecosystem.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ large data to thin clients.
7373
`seaborn <https://seaborn.pydata.org>`__
7474
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7575

76-
Seaborn is a Python visualization library based on `matplotlib
77-
<http://matplotlib.org>`__. It provides a high-level, dataset-oriented
76+
Seaborn is a Python visualization library based on
77+
`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
7878
interface for creating attractive statistical graphics. The plotting functions
7979
in seaborn understand pandas objects and leverage pandas grouping operations
8080
internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
140140
(Note: HTML tables may or may not be
141141
compatible with non-HTML Jupyter output formats.)
142142

143-
See :ref:`Options and Settings <options>` and :ref:`<options.available>`
143+
See :ref:`Options and Settings <options>` and :ref:`options.available <available>`
144144
for pandas ``display.`` settings.
145145

146146
`quantopian/qgrid <https://github.com/quantopian/qgrid>`__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
169169
Most pandas classes, methods and data attributes can be autocompleted in
170170
Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
171171
`IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
172-
and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
172+
and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
173173
and render Numpydoc documentation on pandas objects in rich text with Sphinx
174174
both automatically and on-demand.
175175

doc/source/io.rst

+13-16
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
6666
CSV & Text files
6767
----------------
6868

69-
The two workhorse functions for reading text files (a.k.a. flat files) are
70-
:func:`read_csv` and :func:`read_table`. They both use the same parsing code to
71-
intelligently convert tabular data into a ``DataFrame`` object. See the
72-
:ref:`cookbook<cookbook.csv>` for some advanced strategies.
69+
The workhorse function for reading text files (a.k.a. flat files) is
70+
:func:`read_csv`. See the :ref:`cookbook<cookbook.csv>` for some advanced strategies.
7371

7472
Parsing options
7573
'''''''''''''''
7674

77-
The functions :func:`read_csv` and :func:`read_table` accept the following
78-
common arguments:
75+
:func:`read_csv` accepts the following common arguments:
7976

8077
Basic
8178
+++++
@@ -780,8 +777,8 @@ Date Handling
780777
Specifying Date Columns
781778
+++++++++++++++++++++++
782779

783-
To better facilitate working with datetime data, :func:`read_csv` and
784-
:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser``
780+
To better facilitate working with datetime data, :func:`read_csv`
781+
uses the keyword arguments ``parse_dates`` and ``date_parser``
785782
to allow users to specify a variety of columns and date/time formats to turn the
786783
input text data into ``datetime`` objects.
787784

@@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns:
14341431
14351432
print(open('data/mindex_ex.csv').read())
14361433
1437-
The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of
1434+
The ``index_col`` argument to ``read_csv`` can take a list of
14381435
column numbers to turn multiple columns into a ``MultiIndex`` for the index of the
14391436
returned object:
14401437

@@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``.
15051502
15061503
.. ipython:: python
15071504
1508-
print(open('tmp2.sv').read())
1509-
pd.read_csv('tmp2.sv', sep=None, engine='python')
1505+
print(open('tmp2.sv').read())
1506+
pd.read_csv('tmp2.sv', sep=None, engine='python')
15101507
15111508
.. _io.multiple_files:
15121509

@@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following:
15281525
.. ipython:: python
15291526
15301527
print(open('tmp.sv').read())
1531-
table = pd.read_table('tmp.sv', sep='|')
1528+
table = pd.read_csv('tmp.sv', sep='|')
15321529
table
15331530
15341531
1535-
By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return
1532+
By specifying a ``chunksize`` to ``read_csv``, the return
15361533
value will be an iterable object of type ``TextFileReader``:
15371534

15381535
.. ipython:: python
15391536
1540-
reader = pd.read_table('tmp.sv', sep='|', chunksize=4)
1537+
reader = pd.read_csv('tmp.sv', sep='|', chunksize=4)
15411538
reader
15421539
15431540
for chunk in reader:
@@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
15481545

15491546
.. ipython:: python
15501547
1551-
reader = pd.read_table('tmp.sv', sep='|', iterator=True)
1548+
reader = pd.read_csv('tmp.sv', sep='|', iterator=True)
15521549
reader.get_chunk(5)
15531550
15541551
.. ipython:: python
@@ -3067,7 +3064,7 @@ Clipboard
30673064

30683065
A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method,
30693066
which takes the contents of the clipboard buffer and passes them to the
3070-
``read_table`` method. For instance, you can copy the following text to the
3067+
``read_csv`` method. For instance, you can copy the following text to the
30713068
clipboard (CTRL-C on many operating systems):
30723069

30733070
.. code-block:: python

doc/source/text.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi
312312
313313
s
314314
u
315-
s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-')
315+
s.str.cat([u.values,
316+
u.index.astype(str).values], na_rep='-')
316317
317318
All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
318319

319320
.. ipython:: python
320321
321322
v
322-
s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
323+
s.str.cat([u, v], join='outer', na_rep='-')
323324
324325
If using ``join='right'`` on a list of ``others`` that contains different indexes,
325326
the union of these indexes will be used as the basis for the final concatenation:

0 commit comments

Comments
 (0)