|
5 | 5 |
|
6 | 6 | def gapminder():
|
7 | 7 | """
|
8 |
| - Each row represents a country on a given year. |
| 8 | +Each row represents a country on a given year. |
9 | 9 |
|
10 |
| - https://www.gapminder.org/data/ |
| 10 | +https://www.gapminder.org/data/ |
11 | 11 |
|
12 |
| - Returns: |
13 |
| - A `pandas.DataFrame` with 1704 rows and the following columns: `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap', |
14 |
| - 'iso_alpha', 'iso_num']`. |
15 |
| - """ |
| 12 | +Returns: |
| 13 | + A `pandas.DataFrame` with 1704 rows and the following columns: |
| 14 | + `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap', |
| 15 | + 'iso_alpha', 'iso_num']`. |
| 16 | +""" |
16 | 17 | return _get_dataset("gapminder")
|
17 | 18 |
|
18 | 19 |
|
19 | 20 | def tips():
|
20 | 21 | """
|
21 |
| - Each row represents a restaurant bill. |
| 22 | +Each row represents a restaurant bill. |
22 | 23 |
|
23 |
| - https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html |
| 24 | +https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html |
24 | 25 |
|
25 |
| - Returns: |
26 |
| - A `pandas.DataFrame` with 244 rows and the following columns: `['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`. |
27 |
| - """ |
| 26 | +Returns: |
| 27 | + A `pandas.DataFrame` with 244 rows and the following columns: |
| 28 | + `['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`. |
| 29 | +""" |
28 | 30 | return _get_dataset("tips")
|
29 | 31 |
|
30 | 32 |
|
31 | 33 | def iris():
|
32 | 34 | """
|
33 |
| - Each row represents a flower. |
| 35 | +Each row represents a flower. |
34 | 36 |
|
35 |
| - https://en.wikipedia.org/wiki/Iris_flower_data_set |
| 37 | +https://en.wikipedia.org/wiki/Iris_flower_data_set |
36 | 38 |
|
37 |
| - Returns: |
38 |
| - A `pandas.DataFrame` with 150 rows and the following columns: `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', |
39 |
| - 'species_id']`. |
40 |
| - """ |
| 39 | +Returns: |
| 40 | + A `pandas.DataFrame` with 150 rows and the following columns: |
| 41 | + `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`. |
| 42 | +""" |
41 | 43 | return _get_dataset("iris")
|
42 | 44 |
|
43 | 45 |
|
44 | 46 | def wind():
|
45 | 47 | """
|
46 |
| - Each row represents a level of wind intensity in a cardinal direction, and its frequency. |
| 48 | +Each row represents a level of wind intensity in a cardinal direction, and its frequency. |
47 | 49 |
|
48 |
| - Returns: |
49 |
| - A `pandas.DataFrame` with 128 rows and the following columns: `['direction', 'strength', 'frequency']`. |
50 |
| - """ |
| 50 | +Returns: |
| 51 | + A `pandas.DataFrame` with 128 rows and the following columns: |
| 52 | + `['direction', 'strength', 'frequency']`. |
| 53 | +""" |
51 | 54 | return _get_dataset("wind")
|
52 | 55 |
|
53 | 56 |
|
54 | 57 | def election():
|
55 | 58 | """
|
56 |
| - Each row represents voting results for an electoral district in the 2013 Montreal mayoral election. |
| 59 | +Each row represents voting results for an electoral district in the 2013 Montreal |
| 60 | +mayoral election. |
57 | 61 |
|
58 |
| - Returns: |
59 |
| - A `pandas.DataFrame` with 58 rows and the following columns: `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result']`. |
60 |
| - """ |
| 62 | +Returns: |
| 63 | + A `pandas.DataFrame` with 58 rows and the following columns: |
| 64 | + `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`. |
| 65 | +""" |
61 | 66 | return _get_dataset("election")
|
62 | 67 |
|
63 | 68 |
|
64 |
| -def carshare(): |
| 69 | +def election_geojson(): |
65 | 70 | """
|
66 |
| - Each row represents the availability of car-sharing services near the centroid of a zone in Montreal. |
| 71 | +Each feature represents an electoral district in the 2013 Montreal mayoral election. |
| 72 | +
|
| 73 | +Returns: |
| 74 | + A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id` |
| 75 | + is an electoral district numerical ID and whose `district` property is the ID and |
| 76 | + district name. |
| 77 | +""" |
| 78 | + import gzip |
| 79 | + import json |
| 80 | + import os |
| 81 | + |
| 82 | + path = os.path.join( |
| 83 | + os.path.dirname(os.path.dirname(__file__)), |
| 84 | + "package_data", |
| 85 | + "datasets", |
| 86 | + "election.geojson.gz", |
| 87 | + ) |
| 88 | + with gzip.GzipFile(path, "r") as f: |
| 89 | + result = json.loads(f.read().decode("utf-8")) |
| 90 | + return result |
| 91 | + |
67 | 92 |
|
68 |
| - Returns: |
69 |
| - A `pandas.DataFrame` with 249 rows and the following columns: `['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`. |
| 93 | +def carshare(): |
70 | 94 | """
|
| 95 | +Each row represents the availability of car-sharing services near the centroid of a zone |
| 96 | +in Montreal. |
| 97 | +
|
| 98 | +Returns: |
| 99 | + A `pandas.DataFrame` with 249 rows and the following columns: |
| 100 | + `['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`. |
| 101 | +""" |
71 | 102 | return _get_dataset("carshare")
|
72 | 103 |
|
73 | 104 |
|
|
0 commit comments