Skip to content

Commit b560fda

Browse files
author
Camilo Cota
committed
Fix what's new
2 parents dc7acd1 + 2de2884 commit b560fda

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1079
-316
lines changed

asv_bench/benchmarks/frame_methods.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ class frame_get_dtype_counts(object):
423423
goal_time = 0.2
424424

425425
def setup(self):
426-
self.df = pandas.DataFrame(np.random.randn(10, 10000))
426+
self.df = DataFrame(np.random.randn(10, 10000))
427427

428428
def time_frame_get_dtype_counts(self):
429429
self.df.get_dtype_counts()
@@ -985,3 +985,14 @@ def setup(self):
985985

986986
def time_series_string_vector_slice(self):
987987
self.s.str[:5]
988+
989+
990+
class frame_quantile_axis1(object):
991+
goal_time = 0.2
992+
993+
def setup(self):
994+
self.df = DataFrame(np.random.randn(1000, 3),
995+
columns=list('ABC'))
996+
997+
def time_frame_quantile_axis1(self):
998+
self.df.quantile([0.1, 0.5], axis=1)

codecov.yml

-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,3 @@ coverage:
77
default:
88
target: '50'
99
branches: null
10-
changes:
11-
default:
12-
branches: null

doc/source/10min.rst

+11
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,17 @@ SQL style merges. See the :ref:`Database style joining <merging.join>`
483483
right
484484
pd.merge(left, right, on='key')
485485
486+
Another example that can be given is:
487+
488+
.. ipython:: python
489+
490+
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
491+
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
492+
left
493+
right
494+
pd.merge(left, right, on='key')
495+
496+
486497
Append
487498
~~~~~~
488499

doc/source/advanced.rst

+13
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,13 @@ return a copy of the data rather than a view:
528528
jim joe
529529
1 z 0.64094
530530
531+
Furthermore if you try to index something that is not fully lexsorted, this can raise:
532+
533+
.. code-block:: ipython
534+
535+
In [5]: dfm.loc[(0,'y'):(1, 'z')]
536+
KeyError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
537+
531538
The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
532539

533540
.. ipython:: python
@@ -542,6 +549,12 @@ The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and t
542549
dfm.index.is_lexsorted()
543550
dfm.index.lexsort_depth
544551
552+
And now selection works as expected.
553+
554+
.. ipython:: python
555+
556+
dfm.loc[(0,'y'):(1, 'z')]
557+
545558
Take Methods
546559
------------
547560

doc/source/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ delimiter : str, default ``None``
9999
Alternative argument name for sep.
100100
delim_whitespace : boolean, default False
101101
Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``)
102-
will be used as the delimiter. Equivalent to setting ``sep='\+s'``.
102+
will be used as the delimiter. Equivalent to setting ``sep='\s+'``.
103103
If this option is set to True, nothing should be passed in for the
104104
``delimiter`` parameter.
105105

doc/source/reshaping.rst

+10
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,16 @@ If ``crosstab`` receives only two Series, it will provide a frequency table.
445445
446446
pd.crosstab(df.A, df.B)
447447
448+
Any input passed containing ``Categorical`` data will have **all** of its
449+
categories included in the cross-tabulation, even if the actual data does
450+
not contain any instances of a particular category.
451+
452+
.. ipython:: python
453+
454+
foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
455+
bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
456+
pd.crosstab(foo, bar)
457+
448458
Normalization
449459
~~~~~~~~~~~~~
450460

doc/source/text.rst

+12-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ Unlike ``extract`` (which returns only the first match),
281281

282282
.. ipython:: python
283283
284-
s = pd.Series(["a1a2", "b1", "c1"], ["A", "B", "C"])
284+
s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
285285
s
286286
two_groups = '(?P<letter>[a-z])(?P<digit>[0-9])'
287287
s.str.extract(two_groups, expand=True)
@@ -313,6 +313,17 @@ then ``extractall(pat).xs(0, level='match')`` gives the same result as
313313
extractall_result
314314
extractall_result.xs(0, level="match")
315315
316+
``Index`` also supports ``.str.extractall``. It returns a ``DataFrame`` which has the
317+
same result as a ``Series.str.extractall`` with a default index (starts from 0).
318+
319+
.. versionadded:: 0.18.2
320+
321+
.. ipython:: python
322+
323+
pd.Index(["a1a2", "b1", "c1"]).str.extractall(two_groups)
324+
325+
pd.Series(["a1a2", "b1", "c1"]).str.extractall(two_groups)
326+
316327
317328
Testing for Strings that Match or Contain a Pattern
318329
---------------------------------------------------

doc/source/whatsnew/v0.18.1.txt

-1
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,6 @@ Performance Improvements
563563
- Improved speed of SAS reader (:issue:`12656`, :issue:`12961`)
564564
- Performance improvements in ``.groupby(..).cumcount()`` (:issue:`11039`)
565565
- Improved memory usage in ``pd.read_csv()`` when using ``skiprows=an_integer`` (:issue:`13005`)
566-
567566
- Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`)
568567
- Improved performance of ``Period`` construction and time series plotting (:issue:`12903`, :issue:`11831`).
569568
- Improved performance of ``.str.encode()`` and ``.str.decode()`` methods (:issue:`13008`)

doc/source/whatsnew/v0.18.2.txt

+20-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,14 @@ Other enhancements
3131

3232
- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
3333

34-
- Support decimal option in PythonParser
34+
- ``Index`` now supports ``.str.extractall()`` which returns ``DataFrame``, see :ref:`Extract all matches in each subject (extractall) <text.extractall>` (:issue:`10008`, :issue:`13156`)
35+
36+
.. ipython:: python
3537

38+
idx = pd.Index(["a1a2", "b1", "c1"])
39+
idx.str.extractall("[ab](?P<digit>\d)")
40+
41+
- Support decimal option in PythonParser
3642

3743
.. _whatsnew_0182.api:
3844

@@ -97,22 +103,31 @@ Performance Improvements
97103

98104
- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`)
99105
- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`)
106+
- increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`)
107+
108+
100109

101110

102111

103112
.. _whatsnew_0182.bug_fixes:
104113

105114
Bug Fixes
106115
~~~~~~~~~
107-
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
108116

117+
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
118+
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
119+
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
120+
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
109121

110122

123+
- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`)
111124

112125

126+
- Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()``); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`)
113127

114128

115129

130+
- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`)
116131

117132

118133
- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`)
@@ -121,14 +136,17 @@ Bug Fixes
121136

122137

123138

139+
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
124140

125141

126142

127143

128144

129145

146+
- Bug in ``Series`` arithmetic raises ``TypeError`` if it contains datetime-like as ``object`` dtype (:issue:`13043`)
130147

131148

132149

133150
- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`)
134151
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
152+
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -985,7 +985,7 @@ def __setstate__(self, state):
985985

986986
# Provide compatibility with pre-0.15.0 Categoricals.
987987
if '_codes' not in state and 'labels' in state:
988-
state['_codes'] = state.pop('labels')
988+
state['_codes'] = state.pop('labels').astype(np.int8)
989989
if '_categories' not in state and '_levels' in state:
990990
state['_categories'] = self._validate_categories(state.pop(
991991
'_levels'))

pandas/core/frame.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -4989,31 +4989,27 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
49894989
0.5 2.5 55.0
49904990
"""
49914991
self._check_percentile(q)
4992-
if not com.is_list_like(q):
4993-
q = [q]
4994-
squeeze = True
4995-
else:
4996-
squeeze = False
49974992

49984993
data = self._get_numeric_data() if numeric_only else self
49994994
axis = self._get_axis_number(axis)
4995+
is_transposed = axis == 1
50004996

5001-
def _quantile(series):
5002-
res = series.quantile(q, interpolation=interpolation)
5003-
return series.name, res
5004-
5005-
if axis == 1:
4997+
if is_transposed:
50064998
data = data.T
50074999

5008-
# unable to use DataFrame.apply, becasuse data may be empty
5009-
result = dict(_quantile(s) for (_, s) in data.iteritems())
5010-
result = self._constructor(result, columns=data.columns)
5011-
if squeeze:
5012-
if result.shape == (1, 1):
5013-
result = result.T.iloc[:, 0] # don't want scalar
5014-
else:
5015-
result = result.T.squeeze()
5016-
result.name = None # For groupby, so it can set an index name
5000+
result = data._data.quantile(qs=q,
5001+
axis=1,
5002+
interpolation=interpolation,
5003+
transposed=is_transposed)
5004+
5005+
if result.ndim == 2:
5006+
result = self._constructor(result)
5007+
else:
5008+
result = self._constructor_sliced(result, name=q)
5009+
5010+
if is_transposed:
5011+
result = result.T
5012+
50175013
return result
50185014

50195015
def to_timestamp(self, freq=None, how='start', axis=0, copy=True):

0 commit comments

Comments
 (0)