Skip to content

Commit 8ac6355

Browse files
committed
Merge remote-tracking branch 'upstream/master' into windows_crlf
2 parents e4badc4 + f771ef6 commit 8ac6355

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+718
-346
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
<tr>
5757
<td></td>
5858
<td>
59-
<a href="https://ci.appveyor.com/project/pandas-dev/pandas">
60-
<img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
59+
<a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
60+
<img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
6161
</a>
6262
</td>
6363
</tr>

asv_bench/benchmarks/frame_methods.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -505,14 +505,21 @@ class NSort(object):
505505
param_names = ['keep']
506506

507507
def setup(self, keep):
508-
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
508+
self.df = DataFrame(np.random.randn(100000, 3),
509+
columns=list('ABC'))
509510

510-
def time_nlargest(self, keep):
511+
def time_nlargest_one_column(self, keep):
511512
self.df.nlargest(100, 'A', keep=keep)
512513

513-
def time_nsmallest(self, keep):
514+
def time_nlargest_two_columns(self, keep):
515+
self.df.nlargest(100, ['A', 'B'], keep=keep)
516+
517+
def time_nsmallest_one_column(self, keep):
514518
self.df.nsmallest(100, 'A', keep=keep)
515519

520+
def time_nsmallest_two_columns(self, keep):
521+
self.df.nsmallest(100, ['A', 'B'], keep=keep)
522+
516523

517524
class Describe(object):
518525

ci/doctests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
2121

2222
# DataFrame / Series docstrings
2323
pytest --doctest-modules -v pandas/core/frame.py \
24-
-k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
24+
-k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
2525

2626
if [ $? -ne "0" ]; then
2727
RET=1

doc/make.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,10 @@ def _sphinx_build(self, kind):
233233
'-b{}'.format(kind),
234234
'-{}'.format(
235235
'v' * self.verbosity) if self.verbosity else '',
236-
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
236+
'-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
237237
'-Dexclude_patterns={}'.format(self.exclude_patterns),
238-
SOURCE_PATH,
239-
os.path.join(BUILD_PATH, kind))
238+
'"{}"'.format(SOURCE_PATH),
239+
'"{}"'.format(os.path.join(BUILD_PATH, kind)))
240240

241241
def _open_browser(self):
242242
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')

doc/source/api.rst

+9
Original file line numberDiff line numberDiff line change
@@ -2603,3 +2603,12 @@ objects.
26032603
generated/pandas.Series.ix
26042604
generated/pandas.Series.imag
26052605
generated/pandas.Series.real
2606+
2607+
2608+
.. Can't convince sphinx to generate toctree for this class attribute.
2609+
.. So we do it manually to avoid a warning
2610+
2611+
.. toctree::
2612+
:hidden:
2613+
2614+
generated/pandas.api.extensions.ExtensionDtype.na_value

doc/source/basics.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
19351935
* :ref:`Categorical <categorical>`
19361936
* :ref:`Datetime with Timezone <timeseries.timezone_series>`
19371937
* :ref:`Period <timeseries.periods>`
1938-
* :ref:`Interval <advanced.indexing.intervallindex>`
1938+
* :ref:`Interval <indexing.intervallindex>`
19391939

19401940
Pandas uses the ``object`` dtype for storing strings.
19411941

doc/source/computation.rst

+15
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,21 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
153153
frame.corr(min_periods=12)
154154
155155
156+
.. versionadded:: 0.24.0
157+
158+
The ``method`` argument can also be a callable for a generic correlation
159+
calculation. In this case, it should be a single function
160+
that produces a single value from two ndarray inputs. Suppose we wanted to
161+
compute the correlation based on histogram intersection:
162+
163+
.. ipython:: python
164+
165+
# histogram intersection
166+
histogram_intersection = lambda a, b: np.minimum(
167+
np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
168+
).sum()
169+
frame.corr(method=histogram_intersection)
170+
156171
A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to
157172
compute the correlation between like-labeled Series contained in different
158173
DataFrame objects.

doc/source/cookbook.rst

+2-4
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
505505
.. ipython:: python
506506
507507
df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
508-
509508
gb = df.groupby('A')
510509
511510
def replace(g):
512-
mask = g < 0
513-
g.loc[mask] = g[~mask].mean()
514-
return g
511+
mask = g < 0
512+
return g.where(mask, g[~mask].mean())
515513
516514
gb.transform(replace)
517515

doc/source/ecosystem.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ large data to thin clients.
7373
`seaborn <https://seaborn.pydata.org>`__
7474
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7575

76-
Seaborn is a Python visualization library based on `matplotlib
77-
<http://matplotlib.org>`__. It provides a high-level, dataset-oriented
76+
Seaborn is a Python visualization library based on
77+
`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
7878
interface for creating attractive statistical graphics. The plotting functions
7979
in seaborn understand pandas objects and leverage pandas grouping operations
8080
internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
140140
(Note: HTML tables may or may not be
141141
compatible with non-HTML Jupyter output formats.)
142142

143-
See :ref:`Options and Settings <options>` and :ref:`<options.available>`
143+
See :ref:`Options and Settings <options>` and :ref:`options.available <available>`
144144
for pandas ``display.`` settings.
145145

146146
`quantopian/qgrid <https://github.com/quantopian/qgrid>`__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
169169
Most pandas classes, methods and data attributes can be autocompleted in
170170
Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
171171
`IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
172-
and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
172+
and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
173173
and render Numpydoc documentation on pandas objects in rich text with Sphinx
174174
both automatically and on-demand.
175175

doc/source/io.rst

+13-16
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
6666
CSV & Text files
6767
----------------
6868

69-
The two workhorse functions for reading text files (a.k.a. flat files) are
70-
:func:`read_csv` and :func:`read_table`. They both use the same parsing code to
71-
intelligently convert tabular data into a ``DataFrame`` object. See the
72-
:ref:`cookbook<cookbook.csv>` for some advanced strategies.
69+
The workhorse function for reading text files (a.k.a. flat files) is
70+
:func:`read_csv`. See the :ref:`cookbook<cookbook.csv>` for some advanced strategies.
7371

7472
Parsing options
7573
'''''''''''''''
7674

77-
The functions :func:`read_csv` and :func:`read_table` accept the following
78-
common arguments:
75+
:func:`read_csv` accepts the following common arguments:
7976

8077
Basic
8178
+++++
@@ -780,8 +777,8 @@ Date Handling
780777
Specifying Date Columns
781778
+++++++++++++++++++++++
782779

783-
To better facilitate working with datetime data, :func:`read_csv` and
784-
:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser``
780+
To better facilitate working with datetime data, :func:`read_csv`
781+
uses the keyword arguments ``parse_dates`` and ``date_parser``
785782
to allow users to specify a variety of columns and date/time formats to turn the
786783
input text data into ``datetime`` objects.
787784

@@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns:
14341431
14351432
print(open('data/mindex_ex.csv').read())
14361433
1437-
The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of
1434+
The ``index_col`` argument to ``read_csv`` can take a list of
14381435
column numbers to turn multiple columns into a ``MultiIndex`` for the index of the
14391436
returned object:
14401437

@@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``.
15051502
15061503
.. ipython:: python
15071504
1508-
print(open('tmp2.sv').read())
1509-
pd.read_csv('tmp2.sv', sep=None, engine='python')
1505+
print(open('tmp2.sv').read())
1506+
pd.read_csv('tmp2.sv', sep=None, engine='python')
15101507
15111508
.. _io.multiple_files:
15121509

@@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following:
15281525
.. ipython:: python
15291526
15301527
print(open('tmp.sv').read())
1531-
table = pd.read_table('tmp.sv', sep='|')
1528+
table = pd.read_csv('tmp.sv', sep='|')
15321529
table
15331530
15341531
1535-
By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return
1532+
By specifying a ``chunksize`` to ``read_csv``, the return
15361533
value will be an iterable object of type ``TextFileReader``:
15371534

15381535
.. ipython:: python
15391536
1540-
reader = pd.read_table('tmp.sv', sep='|', chunksize=4)
1537+
reader = pd.read_csv('tmp.sv', sep='|', chunksize=4)
15411538
reader
15421539
15431540
for chunk in reader:
@@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
15481545

15491546
.. ipython:: python
15501547
1551-
reader = pd.read_table('tmp.sv', sep='|', iterator=True)
1548+
reader = pd.read_csv('tmp.sv', sep='|', iterator=True)
15521549
reader.get_chunk(5)
15531550
15541551
.. ipython:: python
@@ -3067,7 +3064,7 @@ Clipboard
30673064

30683065
A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method,
30693066
which takes the contents of the clipboard buffer and passes them to the
3070-
``read_table`` method. For instance, you can copy the following text to the
3067+
``read_csv`` method. For instance, you can copy the following text to the
30713068
clipboard (CTRL-C on many operating systems):
30723069

30733070
.. code-block:: python

doc/source/text.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi
312312
313313
s
314314
u
315-
s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-')
315+
s.str.cat([u.values,
316+
u.index.astype(str).values], na_rep='-')
316317
317318
All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
318319

319320
.. ipython:: python
320321
321322
v
322-
s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
323+
s.str.cat([u, v], join='outer', na_rep='-')
323324
324325
If using ``join='right'`` on a list of ``others`` that contains different indexes,
325326
the union of these indexes will be used as the basis for the final concatenation:

doc/source/timeseries.rst

+26-19
Original file line numberDiff line numberDiff line change
@@ -753,18 +753,28 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost:
753753
Iterating through groups
754754
------------------------
755755

756-
With the :ref:`Resampler` object in hand, iterating through the grouped data is very
756+
With the ``Resampler`` object in hand, iterating through the grouped data is very
757757
natural and functions similarly to :py:func:`itertools.groupby`:
758758

759759
.. ipython:: python
760760
761-
resampled = df.resample('H')
761+
small = pd.Series(
762+
range(6),
763+
index=pd.to_datetime(['2017-01-01T00:00:00',
764+
'2017-01-01T00:30:00',
765+
'2017-01-01T00:31:00',
766+
'2017-01-01T01:00:00',
767+
'2017-01-01T03:00:00',
768+
'2017-01-01T03:05:00'])
769+
)
770+
resampled = small.resample('H')
762771
763772
for name, group in resampled:
764-
print(name)
765-
print(group)
773+
print("Group: ", name)
774+
print("-" * 27)
775+
print(group, end="\n\n")
766776
767-
See :ref:`groupby.iterating-label`.
777+
See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more.
768778

769779
.. _timeseries.components:
770780

@@ -910,26 +920,22 @@ It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
910920
various docstrings for the classes.
911921

912922
These operations (``apply``, ``rollforward`` and ``rollback``) preserve time
913-
(hour, minute, etc) information by default. To reset time, use ``normalize=True``
914-
when creating the offset instance. If ``normalize=True``, the result is
915-
normalized after the function is applied.
916-
923+
(hour, minute, etc) information by default. To reset time, use ``normalize``
924+
before or after applying the operation (depending on whether you want the
925+
time information included in the operation.
917926

918927
.. ipython:: python
919928
929+
ts = pd.Timestamp('2014-01-01 09:00')
920930
day = Day()
921-
day.apply(pd.Timestamp('2014-01-01 09:00'))
922-
923-
day = Day(normalize=True)
924-
day.apply(pd.Timestamp('2014-01-01 09:00'))
931+
day.apply(ts)
932+
day.apply(ts).normalize()
925933
934+
ts = pd.Timestamp('2014-01-01 22:00')
926935
hour = Hour()
927-
hour.apply(pd.Timestamp('2014-01-01 22:00'))
928-
929-
hour = Hour(normalize=True)
930-
hour.apply(pd.Timestamp('2014-01-01 22:00'))
931-
hour.apply(pd.Timestamp('2014-01-01 23:00'))
932-
936+
hour.apply(ts)
937+
hour.apply(ts).normalize()
938+
hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize()
933939
934940
.. _timeseries.dayvscalendarday:
935941

@@ -1488,6 +1494,7 @@ time. The method for this is :meth:`~Series.shift`, which is available on all of
14881494
the pandas objects.
14891495

14901496
.. ipython:: python
1497+
14911498
ts = pd.Series(range(len(rng)), index=rng)
14921499
ts = ts[:5]
14931500
ts.shift(1)

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ New Behavior:
373373
s = pd.Series([1,2,3], index=np.arange(3.))
374374
s
375375
s.index
376-
print(s.to_csv(path=None))
376+
print(s.to_csv(path_or_buf=None, header=False))
377377

378378
Changes to dtype assignment behaviors
379379
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ Previously, only ``gzip`` compression was supported. By default, compression of
186186
URLs and paths are now inferred using their file extensions. Additionally,
187187
support for bz2 compression in the python 2 C-engine improved (:issue:`14874`).
188188

189-
.. ipython:: python
189+
.. code-block:: python
190190

191191
url = 'https://github.com/{repo}/raw/{branch}/{path}'.format(
192192
repo = 'pandas-dev/pandas',

0 commit comments

Comments
 (0)