From 9eb8a8d37799c0b5757f44d6d3c816fd5acfabea Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 28 Feb 2018 00:02:06 +0000 Subject: [PATCH] Adding sample datasets to be used in the documentation --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/io/samples.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 pandas/io/samples.py diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 99a3773603fc4..90562dc4bed3b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -306,6 +306,7 @@ Other Enhancements - Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) - For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) - :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) +- Added sample datasets in ``pandas.io.samples`` to be used mainly in the documentation (:issue:`19710`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/io/samples.py b/pandas/io/samples.py new file mode 100644 index 0000000000000..83aca8dfdf85b --- /dev/null +++ b/pandas/io/samples.py @@ -0,0 +1,48 @@ +import numpy +import pandas + + +def _countries_with_penguins(): + """Main dataset to be used in pandas documentation examples.""" + columns = ('Code', 'Name', 'Capital', 'Continent', + 'Penguin species', 'Avg. temperature') + data = [ + ('AO', 'Angola', 'Luanda', 'AF', 1, 21.55), + ('AQ', 'Antarctica', numpy.nan, 'AN', 7, numpy.nan), + ('AR', 'Argentina', 'Buenos Aires', 'SA', 7, 14.8), + ('AU', 'Australia', 'Canberra', 'OC', 10, 21.65), + ('BR', 'Brazil', 'Brasilia', 'SA', 1, 24.95), + ('CL', 'Chile', 'Santiago', 'SA', 10, 8.45), + ('EC', 'Ecuador', 'Quito', 'SA', 2, 21.85), + ('MZ', 'Mozambique', 'Maputo', 'AF', 1, 23.8), + ('NA', 'Namibia', 'Windhoek', 'AF', 2, 19.95), + ('NZ', 'New Zealand', 'Wellington', 'OC', 7, 10.55), + ('PE', 'Peru', 'Lima', 'SA', 2, 19.6), + ('UY', 'Uruguay', 'Montevideo', 'SA', 3, 17.55), + ('ZA', 'South Africa', 'Pretoria', 'AF', 4, 17.75), + ] + df = pandas.DataFrame(data, columns=columns).set_index('Code') + df['Continent'] = df['Continent'].astype('category') + return df + + +def _stocks(): + """Sample dataset to be used in time series examples in pandas docs.""" + data = {'MSFT': [62.9423, 62.2900, 64.5091, 67.0557, 68.4074, 67.9031, + 71.6169, 73.6561, 73.7712, 82.3774, 83.3578, 85.1401], + 'RHT': [75.8800, 82.8100, 86.5000, 88.0800, 89.5700, 95.7500, + 98.8700, 107.5000, 110.8600, 120.8300, 126.7600, 120.1000], + 'AAPL': [118.9445, 134.2745, 141.4228, 141.4129, 150.3810, + 142.3624, 147.0181, 162.1124, 152.9444, 167.7505, + 170.5391, 168.5428]} + index = pandas.to_datetime(['2017-01-01', '2017-02-01', '2017-03-01', + '2017-04-01', '2017-05-01', '2017-06-01', + '2017-07-01', '2017-08-01', '2017-09-01', + '2017-10-01', '2017-11-01', '2017-12-01']) + index.name = 'Date' + df = pandas.DataFrame(data, index=index) + return df + + +countries_with_penguins = _countries_with_penguins() +stocks = _stocks()