add a geojson dataset and link from election dataset

nicolaskruchten · nicolaskruchten · commit b9d6e5c2fb31 · 2020-01-17T11:58:57.000-05:00
diff --git a/doc/python/mapbox-county-choropleth.md b/doc/python/mapbox-county-choropleth.md
@@ -6,7 +6,7 @@ jupyter:
       extension: .md
       format_name: markdown
       format_version: '1.2'
-      jupytext_version: 1.3.0
+      jupytext_version: 1.3.1
   kernelspec:
     display_name: Python 3
     language: python
@@ -20,7 +20,7 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.7.3
+    version: 3.6.8
   plotly:
     description: How to make a Mapbox Choropleth Map of US Counties in Python with
       Plotly.
@@ -34,6 +34,47 @@ jupyter:
     thumbnail: thumbnail/mapbox-choropleth.png
 ---
 
+```python
+import plotly.express as px
+```
+
+```python
+geo = px.data.election_geojson()
+```
+
+```python
+help(px.data.election_geojson)
+```
+
+```python
+df = px.data.election()
+```
+
+```python
+px.choropleth(df, geojson=geo, locations="district", featureidkey="properties.district", 
+              color="winner", projection="mercator").update_geos(visible=False, fitbounds="geojson")
+```
+
+```python
+df = px.data.election()
+```
+
+```python
+df
+```
+
+```python
+ids = [x.split("-")[0] for x in df.district]
+```
+
+```python
+df["district_id"] = ids
+```
+
+```python
+df.to_csv("~/Downloads/elections2.csv", index=False)
+```
+
 
 #### Mapbox Access Token
 
@@ -58,6 +99,14 @@ with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-c
 counties["features"][0]
 ```
 
+```python
+fips2county = {f["id"]: f["properties"]["NAME"] for f in counties["features"]}
+```
+
+```python
+counties = [fips2county[f] if f in fips2county else None for f in df.fips]
+```
+
 #### Data indexed by `id`
 
 Here we load unemployment data by county, also indexed by [FIPS code](https://en.wikipedia.org/wiki/FIPS_county_code).
@@ -75,23 +124,29 @@ df.head()
 
 With `px.choropleth_mapbox`, each row of the DataFrame is represented as a region of the choropleth.
 
+```python
+len(df["county_name"].unique())
+```
+
 ```python
 from urllib.request import urlopen
 import json
 with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
     counties = json.load(response)
 
 import pandas as pd
-df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
+df2 = pd.read_csv("~/Downloads/fips-unemp-16.csv",
                    dtype={"fips": str})
 
 import plotly.express as px
 
-fig = px.choropleth_mapbox(df, geojson=counties, locations='fips', color='unemp',
+fig = px.choropleth_mapbox(df, geojson=counties, 
+                           locations='county_name', featureidkey="properties.NAME",
+                           color='unemp',
                            color_continuous_scale="Viridis", 
                            range_color=(0, 12),
                            mapbox_style="carto-positron",
-                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
+                           zoom=3, center = {"lat": 37.0a902, "lon": -95.7129},
                            opacity=0.5,
                            labels={'unemp':'unemployment rate'}
                           )
diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py
@@ -5,69 +5,100 @@
 
 def gapminder():
     """
-    Each row represents a country on a given year.
+Each row represents a country on a given year.
 
-    https://www.gapminder.org/data/
+https://www.gapminder.org/data/
 
-    Returns:
-        A `pandas.DataFrame` with 1704 rows and the following columns: `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
-       'iso_alpha', 'iso_num']`.
-    """
+Returns:
+    A `pandas.DataFrame` with 1704 rows and the following columns:
+    `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
+    'iso_alpha', 'iso_num']`.
+"""
     return _get_dataset("gapminder")
 
 
 def tips():
     """
-    Each row represents a restaurant bill.
+Each row represents a restaurant bill.
 
-    https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
+https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
 
-    Returns:
-        A `pandas.DataFrame` with 244 rows and the following columns: `['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
-    """
+Returns:
+    A `pandas.DataFrame` with 244 rows and the following columns:
+    `['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
+"""
     return _get_dataset("tips")
 
 
 def iris():
     """
-    Each row represents a flower.
+Each row represents a flower.
 
-    https://en.wikipedia.org/wiki/Iris_flower_data_set
+https://en.wikipedia.org/wiki/Iris_flower_data_set
 
-    Returns:
-        A `pandas.DataFrame` with 150 rows and the following columns: `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species',
-       'species_id']`.
-    """
+Returns:
+    A `pandas.DataFrame` with 150 rows and the following columns:
+    `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.
+"""
     return _get_dataset("iris")
 
 
 def wind():
     """
-    Each row represents a level of wind intensity in a cardinal direction, and its frequency.
+Each row represents a level of wind intensity in a cardinal direction, and its frequency.
 
-    Returns:
-        A `pandas.DataFrame` with 128 rows and the following columns: `['direction', 'strength', 'frequency']`.
-    """
+Returns:
+    A `pandas.DataFrame` with 128 rows and the following columns:
+    `['direction', 'strength', 'frequency']`.
+"""
     return _get_dataset("wind")
 
 
 def election():
     """
-    Each row represents voting results for an electoral district in the 2013 Montreal mayoral election.
+Each row represents voting results for an electoral district in the 2013 Montreal
+mayoral election.
 
-    Returns:
-        A `pandas.DataFrame` with 58 rows and the following columns: `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result']`.
-    """
+Returns:
+    A `pandas.DataFrame` with 58 rows and the following columns:
+    `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`.
+"""
     return _get_dataset("election")
 
 
-def carshare():
+def election_geojson():
     """
-    Each row represents the availability of car-sharing services near the centroid of a zone in Montreal.
+Each feature represents an electoral district in the 2013 Montreal mayoral election.
+
+Returns:
+    A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id`
+    is an electoral district numerical ID and whose `district` property is the ID and
+    district name.
+"""
+    import gzip
+    import json
+    import os
+
+    path = os.path.join(
+        os.path.dirname(os.path.dirname(__file__)),
+        "package_data",
+        "datasets",
+        "election.geojson.gz",
+    )
+    with gzip.GzipFile(path, "r") as f:
+        result = json.loads(f.read().decode("utf-8"))
+    return result
+
 
-    Returns:
-        A `pandas.DataFrame` with 249 rows and the following columns: `['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
+def carshare():
     """
+Each row represents the availability of car-sharing services near the centroid of a zone
+in Montreal.
+
+Returns:
+    A `pandas.DataFrame` with 249 rows and the following columns:
+    `['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
+"""
     return _get_dataset("carshare")
 
 
diff --git a/packages/python/plotly/plotly/package_data/datasets/election.csv.gz b/packages/python/plotly/plotly/package_data/datasets/election.csv.gz