Skip to content

Commit 6530500

Browse files
committed
Merge remote-tracking branch 'upstream/master' into disown-tz-only-rebased
2 parents 7a711f9 + 3086e0a commit 6530500

File tree

8 files changed

+1436
-99
lines changed

8 files changed

+1436
-99
lines changed

doc/source/api.rst

Lines changed: 1361 additions & 45 deletions
Large diffs are not rendered by default.

doc/source/conf.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,13 +371,14 @@
371371

372372

373373
intersphinx_mapping = {
374-
'statsmodels': ('http://www.statsmodels.org/devel/', None),
374+
'dateutil': ("https://dateutil.readthedocs.io/en/latest/", None),
375375
'matplotlib': ('https://matplotlib.org/', None),
376+
'numpy': ('https://docs.scipy.org/doc/numpy/', None),
376377
'pandas-gbq': ('https://pandas-gbq.readthedocs.io/en/latest/', None),
378+
'py': ('https://pylib.readthedocs.io/en/latest/', None),
377379
'python': ('https://docs.python.org/3/', None),
378-
'numpy': ('https://docs.scipy.org/doc/numpy/', None),
379380
'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
380-
'py': ('https://pylib.readthedocs.io/en/latest/', None)
381+
'statsmodels': ('http://www.statsmodels.org/devel/', None),
381382
}
382383
import glob
383384
autosummary_generate = glob.glob("*.rst")

doc/source/groupby.rst

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,21 @@ pandas objects can be split on any of their axes. The abstract definition of
6666
grouping is to provide a mapping of labels to group names. To create a GroupBy
6767
object (more on what the GroupBy object is later), you may do the following:
6868

69-
.. code-block:: python
69+
.. ipython:: python
70+
71+
df = pd.DataFrame([('bird', 'Falconiformes', 389.0),
72+
('bird', 'Psittaciformes', 24.0),
73+
('mammal', 'Carnivora', 80.2),
74+
('mammal', 'Primates', np.nan),
75+
('mammal', 'Carnivora', 58)],
76+
index=['falcon', 'parrot', 'lion', 'monkey', 'leopard'],
77+
columns=('class', 'order', 'max_speed'))
78+
df
7079
71-
# default is axis=0
72-
>>> grouped = obj.groupby(key)
73-
>>> grouped = obj.groupby(key, axis=1)
74-
>>> grouped = obj.groupby([key1, key2])
80+
# default is axis=0
81+
grouped = df.groupby('class')
82+
grouped = df.groupby('order', axis='columns')
83+
grouped = df.groupby(['class', 'order'])
7584
7685
The mapping can be specified many different ways:
7786

@@ -239,7 +248,7 @@ the length of the ``groups`` dict, so it is largely just a convenience:
239248
.. ipython::
240249

241250
@verbatim
242-
In [1]: gb.<TAB>
251+
In [1]: gb.<TAB> # noqa: E225, E999
243252
gb.agg gb.boxplot gb.cummin gb.describe gb.filter gb.get_group gb.height gb.last gb.median gb.ngroups gb.plot gb.rank gb.std gb.transform
244253
gb.aggregate gb.count gb.cumprod gb.dtype gb.first gb.groups gb.hist gb.max gb.min gb.nth gb.prod gb.resample gb.sum gb.var
245254
gb.apply gb.cummax gb.cumsum gb.fillna gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight
@@ -1300,12 +1309,17 @@ Now, to find prices per store/product, we can simply do:
13001309
Piping can also be expressive when you want to deliver a grouped object to some
13011310
arbitrary function, for example:
13021311

1303-
.. code-block:: python
1312+
.. ipython:: python
1313+
1314+
def mean(groupby):
1315+
return groupby.mean()
13041316
1305-
df.groupby(['Store', 'Product']).pipe(report_func)
1317+
df.groupby(['Store', 'Product']).pipe(mean)
13061318
1307-
where ``report_func`` takes a GroupBy object and creates a report
1308-
from that.
1319+
where ``mean`` takes a GroupBy object and finds the mean of the Revenue and Quantity
1320+
columns repectively for each Store-Product combination. The ``mean`` function can
1321+
be any function that takes in a GroupBy object; the ``.pipe`` will pass the GroupBy
1322+
object as a parameter into the function you specify.
13091323

13101324
Examples
13111325
--------

doc/source/timeseries.rst

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -855,40 +855,40 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
855855
:header: "Date Offset", "Frequency String", "Description"
856856
:widths: 15, 15, 65
857857

858-
``DateOffset``, None, "Generic offset class, defaults to 1 calendar day"
859-
``BDay`` or ``BusinessDay``, ``'B'``,"business day (weekday)"
860-
``CDay`` or ``CustomBusinessDay``, ``'C'``, "custom business day"
861-
``Week``, ``'W'``, "one week, optionally anchored on a day of the week"
862-
``WeekOfMonth``, ``'WOM'``, "the x-th day of the y-th week of each month"
863-
``LastWeekOfMonth``, ``'LWOM'``, "the x-th day of the last week of each month"
864-
``MonthEnd``, ``'M'``, "calendar month end"
865-
``MonthBegin``, ``'MS'``, "calendar month begin"
866-
``BMonthEnd`` or ``BusinessMonthEnd``, ``'BM'``, "business month end"
867-
``BMonthBegin`` or ``BusinessMonthBegin``, ``'BMS'``, "business month begin"
868-
``CBMonthEnd`` or ``CustomBusinessMonthEnd``, ``'CBM'``, "custom business month end"
869-
``CBMonthBegin`` or ``CustomBusinessMonthBegin``, ``'CBMS'``, "custom business month begin"
870-
``SemiMonthEnd``, ``'SM'``, "15th (or other day_of_month) and calendar month end"
871-
``SemiMonthBegin``, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
872-
``QuarterEnd``, ``'Q'``, "calendar quarter end"
873-
``QuarterBegin``, ``'QS'``, "calendar quarter begin"
874-
``BQuarterEnd``, ``'BQ``, "business quarter end"
875-
``BQuarterBegin``, ``'BQS'``, "business quarter begin"
876-
``FY5253Quarter``, ``'REQ'``, "retail (aka 52-53 week) quarter"
877-
``YearEnd``, ``'A'``, "calendar year end"
878-
``YearBegin``, ``'AS'`` or ``'BYS'``,"calendar year begin"
879-
``BYearEnd``, ``'BA'``, "business year end"
880-
``BYearBegin``, ``'BAS'``, "business year begin"
881-
``FY5253``, ``'RE'``, "retail (aka 52-53 week) year"
882-
``Easter``, None, "Easter holiday"
883-
``BusinessHour``, ``'BH'``, "business hour"
884-
``CustomBusinessHour``, ``'CBH'``, "custom business hour"
885-
``Day``, ``'D'``, "one absolute day"
886-
``Hour``, ``'H'``, "one hour"
887-
``Minute``, ``'T'`` or ``'min'``,"one minute"
888-
``Second``, ``'S'``, "one second"
889-
``Milli``, ``'L'`` or ``'ms'``, "one millisecond"
890-
``Micro``, ``'U'`` or ``'us'``, "one microsecond"
891-
``Nano``, ``'N'``, "one nanosecond"
858+
:class:`~pandas.tseries.offsets.DateOffset`, None, "Generic offset class, defaults to 1 calendar day"
859+
:class:`~pandas.tseries.offsets.BDay` or :class:`~pandas.tseries.offsets.BusinessDay`, ``'B'``,"business day (weekday)"
860+
:class:`~pandas.tseries.offsets.CDay` or :class:`~pandas.tseries.offsets.CustomBusinessDay`, ``'C'``, "custom business day"
861+
:class:`~pandas.tseries.offsets.Week`, ``'W'``, "one week, optionally anchored on a day of the week"
862+
:class:`~pandas.tseries.offsets.WeekOfMonth`, ``'WOM'``, "the x-th day of the y-th week of each month"
863+
:class:`~pandas.tseries.offsets.LastWeekOfMonth`, ``'LWOM'``, "the x-th day of the last week of each month"
864+
:class:`~pandas.tseries.offsets.MonthEnd`, ``'M'``, "calendar month end"
865+
:class:`~pandas.tseries.offsets.MonthBegin`, ``'MS'``, "calendar month begin"
866+
:class:`~pandas.tseries.offsets.BMonthEnd` or :class:`~pandas.tseries.offsets.BusinessMonthEnd`, ``'BM'``, "business month end"
867+
:class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin"
868+
:class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBM'``, "custom business month end"
869+
:class:`~pandas.tseries.offsets.CBMonthBegin` or :class:`~pandas.tseries.offsets.CustomBusinessMonthBegin`, ``'CBMS'``, "custom business month begin"
870+
:class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SM'``, "15th (or other day_of_month) and calendar month end"
871+
:class:`~pandas.tseries.offsets.SemiMonthBegin`, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
872+
:class:`~pandas.tseries.offsets.QuarterEnd`, ``'Q'``, "calendar quarter end"
873+
:class:`~pandas.tseries.offsets.QuarterBegin`, ``'QS'``, "calendar quarter begin"
874+
:class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQ``, "business quarter end"
875+
:class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
876+
:class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
877+
:class:`~pandas.tseries.offsets.YearEnd`, ``'A'``, "calendar year end"
878+
:class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin"
879+
:class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end"
880+
:class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin"
881+
:class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year"
882+
:class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday"
883+
:class:`~pandas.tseries.offsets.BusinessHour`, ``'BH'``, "business hour"
884+
:class:`~pandas.tseries.offsets.CustomBusinessHour`, ``'CBH'``, "custom business hour"
885+
:class:`~pandas.tseries.offsets.Day`, ``'D'``, "one absolute day"
886+
:class:`~pandas.tseries.offsets.Hour`, ``'H'``, "one hour"
887+
:class:`~pandas.tseries.offsets.Minute`, ``'T'`` or ``'min'``,"one minute"
888+
:class:`~pandas.tseries.offsets.Second`, ``'S'``, "one second"
889+
:class:`~pandas.tseries.offsets.Milli`, ``'L'`` or ``'ms'``, "one millisecond"
890+
:class:`~pandas.tseries.offsets.Micro`, ``'U'`` or ``'us'``, "one microsecond"
891+
:class:`~pandas.tseries.offsets.Nano`, ``'N'``, "one nanosecond"
892892

893893
``DateOffsets`` additionally have :meth:`rollforward` and :meth:`rollback`
894894
methods for moving a date forward or backward respectively to a valid offset

doc/source/whatsnew/v0.24.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ Other Enhancements
371371
- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
372372
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
373373
- :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
374-
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue:`8839`)
374+
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`)
375375
- The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
376376
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
377377

@@ -1564,6 +1564,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
15641564
- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
15651565
- :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
15661566
- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
1567+
- Bug in :func:`read_fwf` in which the compression type of a file was not being properly inferred (:issue:`22199`)
15671568
- Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`)
15681569
- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`)
15691570
- Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`)

pandas/io/parsers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def _read(filepath_or_buffer, kwds):
401401
encoding = re.sub('_', '-', encoding).lower()
402402
kwds['encoding'] = encoding
403403

404-
compression = kwds.get('compression')
404+
compression = kwds.get('compression', 'infer')
405405
compression = _infer_compression(filepath_or_buffer, compression)
406406
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
407407
filepath_or_buffer, encoding, compression)

pandas/tests/io/parser/test_read_fwf.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -555,20 +555,26 @@ def test_default_delimiter():
555555
tm.assert_frame_equal(result, expected)
556556

557557

558-
@pytest.mark.parametrize("compression", ["gzip", "bz2"])
559-
def test_fwf_compression(compression):
558+
@pytest.mark.parametrize("infer", [True, False, None])
559+
def test_fwf_compression(compression_only, infer):
560560
data = """1111111111
561561
2222222222
562562
3333333333""".strip()
563563

564+
compression = compression_only
565+
extension = "gz" if compression == "gzip" else compression
566+
564567
kwargs = dict(widths=[5, 5], names=["one", "two"])
565568
expected = read_fwf(StringIO(data), **kwargs)
566569

567570
if compat.PY3:
568571
data = bytes(data, encoding="utf-8")
569572

570-
with tm.ensure_clean() as path:
573+
with tm.ensure_clean(filename="tmp." + extension) as path:
571574
tm.write_to_compressed(compression, path, data)
572575

573-
result = read_fwf(path, compression=compression, **kwargs)
576+
if infer is not None:
577+
kwargs["compression"] = "infer" if infer else compression
578+
579+
result = read_fwf(path, **kwargs)
574580
tm.assert_frame_equal(result, expected)

setup.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ exclude =
5353
doc/source/basics.rst
5454
doc/source/contributing_docstring.rst
5555
doc/source/enhancingperf.rst
56-
doc/source/groupby.rst
5756

5857

5958
[yapf]

0 commit comments

Comments
 (0)