Skip to content

Commit b9d6e5c

Browse files
add a geojson dataset and link from election dataset
1 parent ef46749 commit b9d6e5c

File tree

3 files changed

+120
-34
lines changed

3 files changed

+120
-34
lines changed

Diff for: doc/python/mapbox-county-choropleth.md

+60-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jupyter:
66
extension: .md
77
format_name: markdown
88
format_version: '1.2'
9-
jupytext_version: 1.3.0
9+
jupytext_version: 1.3.1
1010
kernelspec:
1111
display_name: Python 3
1212
language: python
@@ -20,7 +20,7 @@ jupyter:
2020
name: python
2121
nbconvert_exporter: python
2222
pygments_lexer: ipython3
23-
version: 3.7.3
23+
version: 3.6.8
2424
plotly:
2525
description: How to make a Mapbox Choropleth Map of US Counties in Python with
2626
Plotly.
@@ -34,6 +34,47 @@ jupyter:
3434
thumbnail: thumbnail/mapbox-choropleth.png
3535
---
3636

37+
```python
38+
import plotly.express as px
39+
```
40+
41+
```python
42+
geo = px.data.election_geojson()
43+
```
44+
45+
```python
46+
help(px.data.election_geojson)
47+
```
48+
49+
```python
50+
df = px.data.election()
51+
```
52+
53+
```python
54+
px.choropleth(df, geojson=geo, locations="district", featureidkey="properties.district",
55+
color="winner", projection="mercator").update_geos(visible=False, fitbounds="geojson")
56+
```
57+
58+
```python
59+
df = px.data.election()
60+
```
61+
62+
```python
63+
df
64+
```
65+
66+
```python
67+
ids = [x.split("-")[0] for x in df.district]
68+
```
69+
70+
```python
71+
df["district_id"] = ids
72+
```
73+
74+
```python
75+
df.to_csv("~/Downloads/elections2.csv", index=False)
76+
```
77+
3778

3879
#### Mapbox Access Token
3980

@@ -58,6 +99,14 @@ with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-c
5899
counties["features"][0]
59100
```
60101

102+
```python
103+
fips2county = {f["id"]: f["properties"]["NAME"] for f in counties["features"]}
104+
```
105+
106+
```python
107+
counties = [fips2county[f] if f in fips2county else None for f in df.fips]
108+
```
109+
61110
#### Data indexed by `id`
62111

63112
Here we load unemployment data by county, also indexed by [FIPS code](https://en.wikipedia.org/wiki/FIPS_county_code).
@@ -75,23 +124,29 @@ df.head()
75124

76125
With `px.choropleth_mapbox`, each row of the DataFrame is represented as a region of the choropleth.
77126

127+
```python
128+
len(df["county_name"].unique())
129+
```
130+
78131
```python
79132
from urllib.request import urlopen
80133
import json
81134
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
82135
counties = json.load(response)
83136

84137
import pandas as pd
85-
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
138+
df2 = pd.read_csv("~/Downloads/fips-unemp-16.csv",
86139
dtype={"fips": str})
87140

88141
import plotly.express as px
89142

90-
fig = px.choropleth_mapbox(df, geojson=counties, locations='fips', color='unemp',
143+
fig = px.choropleth_mapbox(df, geojson=counties,
144+
locations='county_name', featureidkey="properties.NAME",
145+
color='unemp',
91146
color_continuous_scale="Viridis",
92147
range_color=(0, 12),
93148
mapbox_style="carto-positron",
94-
zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
149+
zoom=3, center = {"lat": 37.0a902, "lon": -95.7129},
95150
opacity=0.5,
96151
labels={'unemp':'unemployment rate'}
97152
)

Diff for: packages/python/plotly/plotly/data/__init__.py

+60-29
Original file line numberDiff line numberDiff line change
@@ -5,69 +5,100 @@
55

66
def gapminder():
77
"""
8-
Each row represents a country on a given year.
8+
Each row represents a country on a given year.
99
10-
https://www.gapminder.org/data/
10+
https://www.gapminder.org/data/
1111
12-
Returns:
13-
A `pandas.DataFrame` with 1704 rows and the following columns: `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
14-
'iso_alpha', 'iso_num']`.
15-
"""
12+
Returns:
13+
A `pandas.DataFrame` with 1704 rows and the following columns:
14+
`['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
15+
'iso_alpha', 'iso_num']`.
16+
"""
1617
return _get_dataset("gapminder")
1718

1819

1920
def tips():
2021
"""
21-
Each row represents a restaurant bill.
22+
Each row represents a restaurant bill.
2223
23-
https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
24+
https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
2425
25-
Returns:
26-
A `pandas.DataFrame` with 244 rows and the following columns: `['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
27-
"""
26+
Returns:
27+
A `pandas.DataFrame` with 244 rows and the following columns:
28+
`['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
29+
"""
2830
return _get_dataset("tips")
2931

3032

3133
def iris():
3234
"""
33-
Each row represents a flower.
35+
Each row represents a flower.
3436
35-
https://en.wikipedia.org/wiki/Iris_flower_data_set
37+
https://en.wikipedia.org/wiki/Iris_flower_data_set
3638
37-
Returns:
38-
A `pandas.DataFrame` with 150 rows and the following columns: `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species',
39-
'species_id']`.
40-
"""
39+
Returns:
40+
A `pandas.DataFrame` with 150 rows and the following columns:
41+
`['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.
42+
"""
4143
return _get_dataset("iris")
4244

4345

4446
def wind():
4547
"""
46-
Each row represents a level of wind intensity in a cardinal direction, and its frequency.
48+
Each row represents a level of wind intensity in a cardinal direction, and its frequency.
4749
48-
Returns:
49-
A `pandas.DataFrame` with 128 rows and the following columns: `['direction', 'strength', 'frequency']`.
50-
"""
50+
Returns:
51+
A `pandas.DataFrame` with 128 rows and the following columns:
52+
`['direction', 'strength', 'frequency']`.
53+
"""
5154
return _get_dataset("wind")
5255

5356

5457
def election():
5558
"""
56-
Each row represents voting results for an electoral district in the 2013 Montreal mayoral election.
59+
Each row represents voting results for an electoral district in the 2013 Montreal
60+
mayoral election.
5761
58-
Returns:
59-
A `pandas.DataFrame` with 58 rows and the following columns: `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result']`.
60-
"""
62+
Returns:
63+
A `pandas.DataFrame` with 58 rows and the following columns:
64+
`['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`.
65+
"""
6166
return _get_dataset("election")
6267

6368

64-
def carshare():
69+
def election_geojson():
6570
"""
66-
Each row represents the availability of car-sharing services near the centroid of a zone in Montreal.
71+
Each feature represents an electoral district in the 2013 Montreal mayoral election.
72+
73+
Returns:
74+
A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id`
75+
is an electoral district numerical ID and whose `district` property is the ID and
76+
district name.
77+
"""
78+
import gzip
79+
import json
80+
import os
81+
82+
path = os.path.join(
83+
os.path.dirname(os.path.dirname(__file__)),
84+
"package_data",
85+
"datasets",
86+
"election.geojson.gz",
87+
)
88+
with gzip.GzipFile(path, "r") as f:
89+
result = json.loads(f.read().decode("utf-8"))
90+
return result
91+
6792

68-
Returns:
69-
A `pandas.DataFrame` with 249 rows and the following columns: `['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
93+
def carshare():
7094
"""
95+
Each row represents the availability of car-sharing services near the centroid of a zone
96+
in Montreal.
97+
98+
Returns:
99+
A `pandas.DataFrame` with 249 rows and the following columns:
100+
`['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
101+
"""
71102
return _get_dataset("carshare")
72103

73104

Binary file not shown.

0 commit comments

Comments
 (0)