From edd012df3c89915bfa06e84ae7183ec626be4405 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 09:52:23 +1000 Subject: [PATCH 01/14] hexbin mapbox implemented in plotly.express --- .../python/plotly/plotly/express/__init__.py | 2 + packages/python/plotly/plotly/express/_doc.py | 9 + .../plotly/plotly/express/_hexbin_mapbox.py | 378 ++++++++++++++++++ 3 files changed, 389 insertions(+) create mode 100644 packages/python/plotly/plotly/express/_hexbin_mapbox.py diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index 72d0b445548..16ce0350fca 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -47,6 +47,7 @@ choropleth_mapbox, density_mapbox, ) +from ._hexbin_mapbox import hexbin_mapbox from ._core import ( # noqa: F401 @@ -100,4 +101,5 @@ "IdentityMap", "Constant", "Range", + "hexbin_mapbox", ] diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 4c7b591f785..84304a2f73b 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -514,6 +514,15 @@ "Sets the number of rendered sectors from any given `level`. Set `maxdepth` to -1 to render all the" "levels in the hierarchy.", ], + agg_func=[ + "function", + "Numpy array aggregator, it must take as input a 1D array", + "and output a scalar value." + ], + gridsize=[ + "int", + "Number of hexagons (horizontally) to be created", + ] ) diff --git a/packages/python/plotly/plotly/express/_hexbin_mapbox.py b/packages/python/plotly/plotly/express/_hexbin_mapbox.py new file mode 100644 index 00000000000..ba5a910095a --- /dev/null +++ b/packages/python/plotly/plotly/express/_hexbin_mapbox.py @@ -0,0 +1,378 @@ +from ._core import make_figure, build_dataframe +from ._doc import make_docstring, docs +from ._chart_types import choropleth_mapbox +import plotly.graph_objs as go +import numpy as np +import pandas as pd +import re + + +def _project_latlon_to_wgs84(lat, lon): + """ + Projects lat and lon to WGS84 to get regular hexagons on a mapbox map + """ + x = lon * np.pi / 180 + y = np.arctanh(np.sin(lat * np.pi/180)) + return x, y + +def _project_wgs84_to_latlon(x, y): + """ + Projects lat and lon to WGS84 to get regular hexagons on a mapbox map + """ + lon = x * 180 / np.pi + lat = (2 * np.arctan(np.exp(y)) - np.pi / 2) * 180 / np.pi + return lat, lon + +def _human_format(number): + """ + Transforms high numbers to human readable numer string + """ + units = ["", "K", "M", "G", "T", "P"] + k = 1000.0 + magnitude = int(np.floor(np.log(number, k))) + return "%.2f%s" % (number / k ** magnitude, units[magnitude]) + +def _getBoundsZoomLevel(lon_min, lon_max, lat_min, lat_max, mapDim): + """ + Get the mapbox zoom level given bounds and a figure dimension + Source: https://stackoverflow.com/questions/6048975/google-maps-v3-how-to-calculate-the-zoom-level-for-a-given-bounds + """ + + scale = 2 # adjustment to reflect MapBox base tiles are 512x512 vs. Google's 256x256 + WORLD_DIM = {'height': 256 * scale, 'width': 256 * scale} + ZOOM_MAX = 18 + + def latRad(lat): + sin = np.sin(lat * np.pi / 180) + radX2 = np.log((1 + sin) / (1 - sin)) / 2 + return max(min(radX2, np.pi), -np.pi) / 2 + + def zoom(mapPx, worldPx, fraction): + return 0.95 * np.log(mapPx / worldPx / fraction) / np.log(2) + + latFraction = (latRad(lat_max) - latRad(lat_min)) / np.pi + + lngDiff = lon_max - lon_min + lngFraction = ((lngDiff + 360) if lngDiff < 0 else lngDiff) / 360 + + latZoom = zoom(mapDim['height'], WORLD_DIM['height'], latFraction) + lngZoom = zoom(mapDim['width'], WORLD_DIM['width'], lngFraction) + + return min(latZoom, lngZoom, ZOOM_MAX) + +def _compute_hexbin( + lat=None, + lon=None, + lat_range=None, + lon_range=None, + color=None, + nx=None, + agg_func=None, + min_count=None +): + """ + Computes the aggregation at hexagonal bin level. + Also defines the coordinates of the hexagons for plotting. + The binning is inspired by matplotlib's implementation. + + Parameters + ---------- + lat : np.ndarray + Array of latitudes + lon : np.ndarray + Array of longitudes + lat_range : np.ndarray + Min and max latitudes + lon_range : np.ndarray + Min and max longitudes + color : np.ndarray + Metric to aggregate at hexagon level + nx : int + Number of hexagons horizontally + agg_func : function + Numpy compatible aggregator, this function must take a one-dimensional + np.ndarray as input and output a scalar + min_count : float + Minimum value for which to display the aggregate + + Returns + ------- + + """ + # Project to WGS 84 + x, y = _project_latlon_to_wgs84(lat, lon) + + if lat_range is None: + lat_range = np.array([lat.min(), lat.max()]) + if lon_range is None: + lon_range = np.array([lon.min(), lon.max()]) + + x_range, y_range = _project_latlon_to_wgs84(lat_range, lon_range) + + xmin = x_range.min() + xmax = x_range.max() + ymin = y_range.min() + ymax = y_range.max() + + Dx = xmax - xmin + Dy = ymax - ymin + dx = Dx / nx + dy = dx * np.sqrt(3) + ny = np.round(Dy / dy).astype(int) + + x = (x - xmin) / dx + y = (y - ymin) / dy + ix1 = np.round(x).astype(int) + iy1 = np.round(y).astype(int) + ix2 = np.floor(x).astype(int) + iy2 = np.floor(y).astype(int) + + nx1 = nx + 1 + ny1 = ny + 1 + nx2 = nx + ny2 = ny + n = nx1 * ny1 + nx2 * ny2 + + d1 = (x - ix1) ** 2 + 3.0 * (y - iy1) ** 2 + d2 = (x - ix2 - 0.5) ** 2 + 3.0 * (y - iy2 - 0.5) ** 2 + bdist = (d1 < d2) + + if color is None: + lattice1 = np.zeros((nx1, ny1)) + lattice2 = np.zeros((nx2, ny2)) + c1 = (0 <= ix1) & (ix1 < nx1) & (0 <= iy1) & (iy1 < ny1) & bdist + c2 = (0 <= ix2) & (ix2 < nx2) & (0 <= iy2) & (iy2 < ny2) & ~bdist + np.add.at(lattice1, (ix1[c1], iy1[c1]), 1) + np.add.at(lattice2, (ix2[c2], iy2[c2]), 1) + if min_count is not None: + lattice1[lattice1 < min_count] = np.nan + lattice2[lattice2 < min_count] = np.nan + accum = np.concatenate([lattice1.ravel(), lattice2.ravel()]) + good_idxs = ~np.isnan(accum) + else: + if min_count is None: + min_count = 0 + + # create accumulation arrays + lattice1 = np.empty((nx1, ny1), dtype=object) + for i in range(nx1): + for j in range(ny1): + lattice1[i, j] = [] + lattice2 = np.empty((nx2, ny2), dtype=object) + for i in range(nx2): + for j in range(ny2): + lattice2[i, j] = [] + + for i in range(len(x)): + if bdist[i]: + if 0 <= ix1[i] < nx1 and 0 <= iy1[i] < ny1: + lattice1[ix1[i], iy1[i]].append(color[i]) + else: + if 0 <= ix2[i] < nx2 and 0 <= iy2[i] < ny2: + lattice2[ix2[i], iy2[i]].append(color[i]) + + for i in range(nx1): + for j in range(ny1): + vals = lattice1[i, j] + if len(vals) > min_count: + lattice1[i, j] = agg_func(vals) + else: + lattice1[i, j] = np.nan + for i in range(nx2): + for j in range(ny2): + vals = lattice2[i, j] + if len(vals) > min_count: + lattice2[i, j] = agg_func(vals) + else: + lattice2[i, j] = np.nan + + accum = np.hstack((lattice1.astype(float).ravel(), + lattice2.astype(float).ravel())) + good_idxs = ~np.isnan(accum) + + agreggated_value = accum[good_idxs] + + centers = np.zeros((n, 2), float) + centers[:nx1 * ny1, 0] = np.repeat(np.arange(nx1), ny1) + centers[:nx1 * ny1, 1] = np.tile(np.arange(ny1), nx1) + centers[nx1 * ny1:, 0] = np.repeat(np.arange(nx2) + 0.5, ny2) + centers[nx1 * ny1:, 1] = np.tile(np.arange(ny2), nx2) + 0.5 + centers[:, 0] *= dx + centers[:, 1] *= dy + centers[:, 0] += xmin + centers[:, 1] += ymin + centers = centers[good_idxs] + + # Define normalised regular hexagon coordinates + hx = [0, .5, .5, 0, -.5, -.5] + hy = [ + -0.5 / np.cos(np.pi / 6), + -0.5 * np.tan(np.pi / 6), + 0.5 * np.tan(np.pi / 6), + 0.5 / np.cos(np.pi / 6), + 0.5 * np.tan(np.pi / 6), + -0.5 * np.tan(np.pi / 6) + ] + + # Number of hexagons needed + m = len(centers) + + # Scale of hexagons + dxh = sorted(list(set(np.diff(sorted(centers[:, 0])))))[1] + dyh = sorted(list(set(np.diff(sorted(centers[:, 1])))))[1] + nx = dxh * 2 + ny = 2/3 * dyh / (0.5 / np.cos(np.pi / 6)) + + # Coordinates for all hexagonal patches + hxs = np.array([hx] * m) * nx + np.vstack(centers[:, 0]) + hys = np.array([hy] * m) * ny + np.vstack(centers[:, 1]) + + # Convert back to lat-lon + hexagons_lats, hexagons_lons = _project_wgs84_to_latlon(hxs, hys) + + # Create unique feature id based on hexagon center + centers = centers.astype(str) + hexagons_ids = pd.Series(centers[:, 0]) + "," + pd.Series(centers[:, 1]) + + return hexagons_lats, hexagons_lons, hexagons_ids, agreggated_value + +def _hexagons_to_geojson(hexagons_lats, hexagons_lons, ids=None): + """ + Creates a geojson of hexagonal features based on the outputs of + _compute_hexbin + """ + features = [] + if ids is None: + ids = np.arange(len(hexagons_lats)) + for lat, lon, idx in zip(hexagons_lats, hexagons_lons, ids): + points = np.array([lon, lat]).T.tolist() + points.append(points[0]) + features.append( + dict( + type='Feature', + id=idx, + geometry=dict(type='Polygon', coordinates=[points]) + ) + ) + return dict(type='FeatureCollection', features=features) + +def hexbin_mapbox( + data_frame=None, + lat=None, + lon=None, + color=None, + gridsize=5, + agg_func=None, + animation_frame=None, + color_discrete_sequence=None, + color_discrete_map={}, + labels={}, + color_continuous_scale=None, + range_color=None, + color_continuous_midpoint=None, + opacity=None, + zoom=None, + center=None, + mapbox_style=None, + title=None, + template=None, + width=None, + height=None, +): + args = build_dataframe(args=locals(), constructor=None) + + if agg_func is None: + agg_func = np.mean + + lat_range = args["data_frame"][args["lat"]].agg(["min", "max"]).values + lon_range = args["data_frame"][args["lon"]].agg(["min", "max"]).values + + hexagons_lats, hexagons_lons, hexagons_ids, count = _compute_hexbin( + lat=args["data_frame"][args["lat"]].values, + lon=args["data_frame"][args["lon"]].values, + lat_range=lat_range, + lon_range=lon_range, + color=None, + nx=gridsize, + agg_func=agg_func, + min_count=-np.inf, + ) + + geojson = _hexagons_to_geojson(hexagons_lats, hexagons_lons, hexagons_ids) + + if zoom is None: + if height is None and width is None: + mapDim = dict(height=450, width=450) + elif height is None and width is not None: + mapDim = dict(height=450, width=width) + elif height is not None and width is None: + mapDim = dict(height=height, width=height) + else: + mapDim = dict(height=height, width=width) + zoom = _getBoundsZoomLevel(*lon_range, *lat_range, mapDim) + + if center is None: + center=dict(lat=lat_range.mean(), lon=lon_range.mean()) + + if args["animation_frame"] is not None: + groups = args["data_frame"].groupby(args["animation_frame"]).groups + else: + groups = {0: args["data_frame"].index} + + agg_data_frame_list = [] + for frame, index in groups.items(): + df = args["data_frame"].loc[index] + _, _, hexagons_ids, aggregated_value = _compute_hexbin( + lat=df[args["lat"]].values, + lon=df[args["lon"]].values, + lat_range=lat_range, + lon_range=lon_range, + color=df[args["color"]].values if args["color"] else None, + nx=gridsize, + agg_func=agg_func, + min_count=None, + ) + agg_data_frame_list.append( + pd.DataFrame( + np.c_[hexagons_ids, aggregated_value], + columns=["locations", "color"] + ) + ) + agg_data_frame = pd.concat( + agg_data_frame_list, axis=0, keys=groups.keys() + ).rename_axis(index=("frame", "index")).reset_index("frame") + + agg_data_frame["color"] = pd.to_numeric(agg_data_frame["color"]) + + if range_color is None: + range_color = [ + agg_data_frame["color"].min(), + agg_data_frame["color"].max() + ] + + return choropleth_mapbox( + data_frame=agg_data_frame, + geojson=geojson, + locations="locations", + color="color", + hover_data={"color": True, "locations": False, "frame": False}, + animation_frame=( + "frame" if args["animation_frame"] is not None else None + ), + color_discrete_sequence=color_discrete_sequence, + color_discrete_map=color_discrete_map, + labels=labels, + color_continuous_scale=color_continuous_scale, + range_color=range_color, + color_continuous_midpoint=color_continuous_midpoint, + opacity=opacity, + zoom=zoom, + center=center, + mapbox_style=mapbox_style, + title=title, + template=template, + width=width, + height=height, + ) + +hexbin_mapbox.__doc__ = make_docstring(hexbin_mapbox) From 0057a14599d50560c6c85e6d7f13d580729e76eb Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 10:08:25 +1000 Subject: [PATCH 02/14] black formatting --- packages/python/plotly/plotly/express/_doc.py | 7 +- .../plotly/plotly/express/_hexbin_mapbox.py | 80 ++++++++++--------- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 84304a2f73b..24298e97509 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -517,12 +517,9 @@ agg_func=[ "function", "Numpy array aggregator, it must take as input a 1D array", - "and output a scalar value." + "and output a scalar value.", ], - gridsize=[ - "int", - "Number of hexagons (horizontally) to be created", - ] + gridsize=["int", "Number of hexagons (horizontally) to be created",], ) diff --git a/packages/python/plotly/plotly/express/_hexbin_mapbox.py b/packages/python/plotly/plotly/express/_hexbin_mapbox.py index ba5a910095a..e9579858254 100644 --- a/packages/python/plotly/plotly/express/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/express/_hexbin_mapbox.py @@ -12,9 +12,10 @@ def _project_latlon_to_wgs84(lat, lon): Projects lat and lon to WGS84 to get regular hexagons on a mapbox map """ x = lon * np.pi / 180 - y = np.arctanh(np.sin(lat * np.pi/180)) + y = np.arctanh(np.sin(lat * np.pi / 180)) return x, y + def _project_wgs84_to_latlon(x, y): """ Projects lat and lon to WGS84 to get regular hexagons on a mapbox map @@ -23,6 +24,7 @@ def _project_wgs84_to_latlon(x, y): lat = (2 * np.arctan(np.exp(y)) - np.pi / 2) * 180 / np.pi return lat, lon + def _human_format(number): """ Transforms high numbers to human readable numer string @@ -32,14 +34,17 @@ def _human_format(number): magnitude = int(np.floor(np.log(number, k))) return "%.2f%s" % (number / k ** magnitude, units[magnitude]) + def _getBoundsZoomLevel(lon_min, lon_max, lat_min, lat_max, mapDim): """ Get the mapbox zoom level given bounds and a figure dimension Source: https://stackoverflow.com/questions/6048975/google-maps-v3-how-to-calculate-the-zoom-level-for-a-given-bounds """ - scale = 2 # adjustment to reflect MapBox base tiles are 512x512 vs. Google's 256x256 - WORLD_DIM = {'height': 256 * scale, 'width': 256 * scale} + scale = ( + 2 # adjustment to reflect MapBox base tiles are 512x512 vs. Google's 256x256 + ) + WORLD_DIM = {"height": 256 * scale, "width": 256 * scale} ZOOM_MAX = 18 def latRad(lat): @@ -55,11 +60,12 @@ def zoom(mapPx, worldPx, fraction): lngDiff = lon_max - lon_min lngFraction = ((lngDiff + 360) if lngDiff < 0 else lngDiff) / 360 - latZoom = zoom(mapDim['height'], WORLD_DIM['height'], latFraction) - lngZoom = zoom(mapDim['width'], WORLD_DIM['width'], lngFraction) + latZoom = zoom(mapDim["height"], WORLD_DIM["height"], latFraction) + lngZoom = zoom(mapDim["width"], WORLD_DIM["width"], lngFraction) return min(latZoom, lngZoom, ZOOM_MAX) + def _compute_hexbin( lat=None, lon=None, @@ -68,7 +74,7 @@ def _compute_hexbin( color=None, nx=None, agg_func=None, - min_count=None + min_count=None, ): """ Computes the aggregation at hexagonal bin level. @@ -135,7 +141,7 @@ def _compute_hexbin( d1 = (x - ix1) ** 2 + 3.0 * (y - iy1) ** 2 d2 = (x - ix2 - 0.5) ** 2 + 3.0 * (y - iy2 - 0.5) ** 2 - bdist = (d1 < d2) + bdist = d1 < d2 if color is None: lattice1 = np.zeros((nx1, ny1)) @@ -186,17 +192,18 @@ def _compute_hexbin( else: lattice2[i, j] = np.nan - accum = np.hstack((lattice1.astype(float).ravel(), - lattice2.astype(float).ravel())) + accum = np.hstack( + (lattice1.astype(float).ravel(), lattice2.astype(float).ravel()) + ) good_idxs = ~np.isnan(accum) - + agreggated_value = accum[good_idxs] centers = np.zeros((n, 2), float) - centers[:nx1 * ny1, 0] = np.repeat(np.arange(nx1), ny1) - centers[:nx1 * ny1, 1] = np.tile(np.arange(ny1), nx1) - centers[nx1 * ny1:, 0] = np.repeat(np.arange(nx2) + 0.5, ny2) - centers[nx1 * ny1:, 1] = np.tile(np.arange(ny2), nx2) + 0.5 + centers[: nx1 * ny1, 0] = np.repeat(np.arange(nx1), ny1) + centers[: nx1 * ny1, 1] = np.tile(np.arange(ny1), nx1) + centers[nx1 * ny1 :, 0] = np.repeat(np.arange(nx2) + 0.5, ny2) + centers[nx1 * ny1 :, 1] = np.tile(np.arange(ny2), nx2) + 0.5 centers[:, 0] *= dx centers[:, 1] *= dy centers[:, 0] += xmin @@ -204,14 +211,14 @@ def _compute_hexbin( centers = centers[good_idxs] # Define normalised regular hexagon coordinates - hx = [0, .5, .5, 0, -.5, -.5] + hx = [0, 0.5, 0.5, 0, -0.5, -0.5] hy = [ -0.5 / np.cos(np.pi / 6), -0.5 * np.tan(np.pi / 6), 0.5 * np.tan(np.pi / 6), 0.5 / np.cos(np.pi / 6), 0.5 * np.tan(np.pi / 6), - -0.5 * np.tan(np.pi / 6) + -0.5 * np.tan(np.pi / 6), ] # Number of hexagons needed @@ -221,7 +228,7 @@ def _compute_hexbin( dxh = sorted(list(set(np.diff(sorted(centers[:, 0])))))[1] dyh = sorted(list(set(np.diff(sorted(centers[:, 1])))))[1] nx = dxh * 2 - ny = 2/3 * dyh / (0.5 / np.cos(np.pi / 6)) + ny = 2 / 3 * dyh / (0.5 / np.cos(np.pi / 6)) # Coordinates for all hexagonal patches hxs = np.array([hx] * m) * nx + np.vstack(centers[:, 0]) @@ -236,6 +243,7 @@ def _compute_hexbin( return hexagons_lats, hexagons_lons, hexagons_ids, agreggated_value + def _hexagons_to_geojson(hexagons_lats, hexagons_lons, ids=None): """ Creates a geojson of hexagonal features based on the outputs of @@ -249,12 +257,13 @@ def _hexagons_to_geojson(hexagons_lats, hexagons_lons, ids=None): points.append(points[0]) features.append( dict( - type='Feature', + type="Feature", id=idx, - geometry=dict(type='Polygon', coordinates=[points]) + geometry=dict(type="Polygon", coordinates=[points]), ) ) - return dict(type='FeatureCollection', features=features) + return dict(type="FeatureCollection", features=features) + def hexbin_mapbox( data_frame=None, @@ -280,10 +289,10 @@ def hexbin_mapbox( height=None, ): args = build_dataframe(args=locals(), constructor=None) - + if agg_func is None: agg_func = np.mean - + lat_range = args["data_frame"][args["lat"]].agg(["min", "max"]).values lon_range = args["data_frame"][args["lon"]].agg(["min", "max"]).values @@ -310,9 +319,9 @@ def hexbin_mapbox( else: mapDim = dict(height=height, width=width) zoom = _getBoundsZoomLevel(*lon_range, *lat_range, mapDim) - + if center is None: - center=dict(lat=lat_range.mean(), lon=lon_range.mean()) + center = dict(lat=lat_range.mean(), lon=lon_range.mean()) if args["animation_frame"] is not None: groups = args["data_frame"].groupby(args["animation_frame"]).groups @@ -334,21 +343,19 @@ def hexbin_mapbox( ) agg_data_frame_list.append( pd.DataFrame( - np.c_[hexagons_ids, aggregated_value], - columns=["locations", "color"] + np.c_[hexagons_ids, aggregated_value], columns=["locations", "color"] ) ) - agg_data_frame = pd.concat( - agg_data_frame_list, axis=0, keys=groups.keys() - ).rename_axis(index=("frame", "index")).reset_index("frame") - + agg_data_frame = ( + pd.concat(agg_data_frame_list, axis=0, keys=groups.keys()) + .rename_axis(index=("frame", "index")) + .reset_index("frame") + ) + agg_data_frame["color"] = pd.to_numeric(agg_data_frame["color"]) if range_color is None: - range_color = [ - agg_data_frame["color"].min(), - agg_data_frame["color"].max() - ] + range_color = [agg_data_frame["color"].min(), agg_data_frame["color"].max()] return choropleth_mapbox( data_frame=agg_data_frame, @@ -356,9 +363,7 @@ def hexbin_mapbox( locations="locations", color="color", hover_data={"color": True, "locations": False, "frame": False}, - animation_frame=( - "frame" if args["animation_frame"] is not None else None - ), + animation_frame=("frame" if args["animation_frame"] is not None else None), color_discrete_sequence=color_discrete_sequence, color_discrete_map=color_discrete_map, labels=labels, @@ -375,4 +380,5 @@ def hexbin_mapbox( height=height, ) + hexbin_mapbox.__doc__ = make_docstring(hexbin_mapbox) From 9359d457cf60f01766f2d5f08d13dfb3b3b51fdb Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 10:30:35 +1000 Subject: [PATCH 03/14] documentation for hexbin_mapbox --- doc/python/plotly-express.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/python/plotly-express.md b/doc/python/plotly-express.md index 9284a49f398..e8479250591 100644 --- a/doc/python/plotly-express.md +++ b/doc/python/plotly-express.md @@ -340,6 +340,17 @@ fig = px.line_mapbox(df, lat="centroid_lat", lon="centroid_lon", color="peak_hou fig.show() ``` +```python +import plotly.express as px +import numpy as np +px.set_mapbox_access_token(open(".mapbox_token").read()) +df = px.data.carshare() +fig = px.hexbin_mapbox(df, lat="centroid_lat", lon="centroid_lon", color="peak_hour", + color_continuous_scale=px.colors.cyclical.IceFire, labels={"color": "Average peak hour"}, + gridsize=10, agg_func=np.mean) +fig.show() +``` + ```python import plotly.express as px df = px.data.gapminder() From 50854da35330c53c67f111dec34f2a06b4c3f270 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 11:48:54 +1000 Subject: [PATCH 04/14] Changelog entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 985f3c231d0..def553f9704 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Fixed special cases with `px.sunburst` and `px.treemap` with `path` input ([#2524](https://github.com/plotly/plotly.py/pull/2524)) +### Added + +- New hexbin_mapbox trace in Plotly Express + ## [4.8.1] - 2020-05-28 ### Fixed From 469014ee0d3a5318b1277dbedc7166fd5a9edce7 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 11:59:23 +1000 Subject: [PATCH 05/14] hexbin_mapbox test --- test/percy/plotly-express.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/percy/plotly-express.py b/test/percy/plotly-express.py index 0a1b1064c50..95d28943f67 100644 --- a/test/percy/plotly-express.py +++ b/test/percy/plotly-express.py @@ -443,6 +443,21 @@ ) fig.write_html(os.path.join(dir_name, "line_mapbox.html")) +import plotly.express as px +import numpy as np + +carshare = px.data.carshare() +fig = px.hexbin_mapbox( + carshare, + lat="centroid_lat", + lon="centroid_lon", + color="peak_hour", + color_continuous_scale=px.colors.cyclical.IceFire, + gridsize=10, + agg_func=np.mean, +) +fig.write_html(os.path.join(dir_name, "hexbin_mapbox.html")) + import plotly.express as px sample_geojson = { From e323b1030caf51661a9edce598c50c7701ff5129 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 12:03:26 +1000 Subject: [PATCH 06/14] Remove unused --- .../python/plotly/plotly/express/_hexbin_mapbox.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/packages/python/plotly/plotly/express/_hexbin_mapbox.py b/packages/python/plotly/plotly/express/_hexbin_mapbox.py index e9579858254..be26097e587 100644 --- a/packages/python/plotly/plotly/express/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/express/_hexbin_mapbox.py @@ -25,16 +25,6 @@ def _project_wgs84_to_latlon(x, y): return lat, lon -def _human_format(number): - """ - Transforms high numbers to human readable numer string - """ - units = ["", "K", "M", "G", "T", "P"] - k = 1000.0 - magnitude = int(np.floor(np.log(number, k))) - return "%.2f%s" % (number / k ** magnitude, units[magnitude]) - - def _getBoundsZoomLevel(lon_min, lon_max, lat_min, lat_max, mapDim): """ Get the mapbox zoom level given bounds and a figure dimension From b423876d42cd7242285eacb566b329ad26dcd779 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Thu, 11 Jun 2020 12:11:30 +1000 Subject: [PATCH 07/14] remove *args for python 2 compatibility --- packages/python/plotly/plotly/express/_hexbin_mapbox.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_hexbin_mapbox.py b/packages/python/plotly/plotly/express/_hexbin_mapbox.py index be26097e587..a4dd581c036 100644 --- a/packages/python/plotly/plotly/express/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/express/_hexbin_mapbox.py @@ -308,7 +308,9 @@ def hexbin_mapbox( mapDim = dict(height=height, width=height) else: mapDim = dict(height=height, width=width) - zoom = _getBoundsZoomLevel(*lon_range, *lat_range, mapDim) + zoom = _getBoundsZoomLevel( + lon_range[0], lon_range[1], lat_range[0], lat_range[1], mapDim + ) if center is None: center = dict(lat=lat_range.mean(), lon=lon_range.mean()) From a0370998b3c5fadb1e579f7a49b966a8b8248ed5 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Fri, 10 Jul 2020 12:24:01 +1000 Subject: [PATCH 08/14] hexbin in ff --- .../python/plotly/plotly/express/__init__.py | 2 - packages/python/plotly/plotly/express/_doc.py | 6 - .../plotly/plotly/figure_factory/__init__.py | 4 + .../_hexbin_mapbox.py | 155 ++++++++++++------ 4 files changed, 113 insertions(+), 54 deletions(-) rename packages/python/plotly/plotly/{express => figure_factory}/_hexbin_mapbox.py (78%) diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index 16ce0350fca..72d0b445548 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -47,7 +47,6 @@ choropleth_mapbox, density_mapbox, ) -from ._hexbin_mapbox import hexbin_mapbox from ._core import ( # noqa: F401 @@ -101,5 +100,4 @@ "IdentityMap", "Constant", "Range", - "hexbin_mapbox", ] diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 24298e97509..4c7b591f785 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -514,12 +514,6 @@ "Sets the number of rendered sectors from any given `level`. Set `maxdepth` to -1 to render all the" "levels in the hierarchy.", ], - agg_func=[ - "function", - "Numpy array aggregator, it must take as input a 1D array", - "and output a scalar value.", - ], - gridsize=["int", "Number of hexagons (horizontally) to be created",], ) diff --git a/packages/python/plotly/plotly/figure_factory/__init__.py b/packages/python/plotly/plotly/figure_factory/__init__.py index 3829ca2fb67..6d3e5d01ed7 100644 --- a/packages/python/plotly/plotly/figure_factory/__init__.py +++ b/packages/python/plotly/plotly/figure_factory/__init__.py @@ -29,10 +29,13 @@ if optional_imports.get_module("pandas") is not None: from plotly.figure_factory._county_choropleth import create_choropleth + from plotly.figure_factory._hexbin_mapbox import create_hexbin_mapbox else: def create_choropleth(*args, **kwargs): raise ImportError("Please install pandas to use `create_choropleth`") + def create_hexbin_mapbox(*args, **kwargs): + raise ImportError("Please install pandas to use `create_hexbin_mapbox`") if optional_imports.get_module("skimage") is not None: @@ -53,6 +56,7 @@ def create_ternary_contour(*args, **kwargs): "create_distplot", "create_facet_grid", "create_gantt", + "create_hexbin_mapbox", "create_ohlc", "create_quiver", "create_scatterplotmatrix", diff --git a/packages/python/plotly/plotly/express/_hexbin_mapbox.py b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py similarity index 78% rename from packages/python/plotly/plotly/express/_hexbin_mapbox.py rename to packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py index a4dd581c036..c7e47a1d1d9 100644 --- a/packages/python/plotly/plotly/express/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py @@ -1,15 +1,13 @@ -from ._core import make_figure, build_dataframe -from ._doc import make_docstring, docs -from ._chart_types import choropleth_mapbox -import plotly.graph_objs as go +from plotly.express._core import build_dataframe +from plotly.express._doc import make_docstring +from plotly.express._chart_types import choropleth_mapbox import numpy as np import pandas as pd -import re def _project_latlon_to_wgs84(lat, lon): """ - Projects lat and lon to WGS84 to get regular hexagons on a mapbox map + Projects lat and lon to WGS84, used to get regular hexagons on a mapbox map """ x = lon * np.pi / 180 y = np.arctanh(np.sin(lat * np.pi / 180)) @@ -18,7 +16,7 @@ def _project_latlon_to_wgs84(lat, lon): def _project_wgs84_to_latlon(x, y): """ - Projects lat and lon to WGS84 to get regular hexagons on a mapbox map + Projects WGS84 to lat and lon, used to get regular hexagons on a mapbox map """ lon = x * 180 / np.pi lat = (2 * np.arctan(np.exp(y)) - np.pi / 2) * 180 / np.pi @@ -55,16 +53,8 @@ def zoom(mapPx, worldPx, fraction): return min(latZoom, lngZoom, ZOOM_MAX) - def _compute_hexbin( - lat=None, - lon=None, - lat_range=None, - lon_range=None, - color=None, - nx=None, - agg_func=None, - min_count=None, + x, y, x_range, y_range, color, nx, agg_func, min_count ): """ Computes the aggregation at hexagonal bin level. @@ -73,38 +63,36 @@ def _compute_hexbin( Parameters ---------- - lat : np.ndarray - Array of latitudes - lon : np.ndarray - Array of longitudes - lat_range : np.ndarray - Min and max latitudes - lon_range : np.ndarray - Min and max longitudes + x : np.ndarray + Array of x values (shape N) + y : np.ndarray + Array of y values (shape N) + x_range : np.ndarray + Min and max x (shape 2) + y_range : np.ndarray + Min and max y (shape 2) color : np.ndarray - Metric to aggregate at hexagon level + Metric to aggregate at hexagon level (shape N) nx : int Number of hexagons horizontally agg_func : function Numpy compatible aggregator, this function must take a one-dimensional np.ndarray as input and output a scalar - min_count : float - Minimum value for which to display the aggregate + min_count : int + Minimum number of points in the hexagon for the hexagon to be displayed Returns ------- + np.ndarray + X coordinates of each hexagon (shape M x 6) + np.ndarray + Y coordinates of each hexagon (shape M x 6) + np.ndarray + Centers of the hexagons (shape M x 2) + np.ndarray + Aggregated value in each hexagon (shape M) """ - # Project to WGS 84 - x, y = _project_latlon_to_wgs84(lat, lon) - - if lat_range is None: - lat_range = np.array([lat.min(), lat.max()]) - if lon_range is None: - lon_range = np.array([lon.min(), lon.max()]) - - x_range, y_range = _project_latlon_to_wgs84(lat_range, lon_range) - xmin = x_range.min() xmax = x_range.max() ymin = y_range.min() @@ -224,6 +212,69 @@ def _compute_hexbin( hxs = np.array([hx] * m) * nx + np.vstack(centers[:, 0]) hys = np.array([hy] * m) * ny + np.vstack(centers[:, 1]) + return hxs, hys, centers, agreggated_value + +def _compute_wgs84_hexbin( + lat=None, + lon=None, + lat_range=None, + lon_range=None, + color=None, + nx=None, + agg_func=None, + min_count=None, +): + """ + Computes the lat-lon aggregation at hexagonal bin level. + Latitude and longitude need to be projected to WGS84 before aggregating + in order to display regular hexagons on the map. + + Parameters + ---------- + lat : np.ndarray + Array of latitudes (shape N) + lon : np.ndarray + Array of longitudes (shape N) + lat_range : np.ndarray + Min and max latitudes (shape 2) + lon_range : np.ndarray + Min and max longitudes (shape 2) + color : np.ndarray + Metric to aggregate at hexagon level (shape N) + nx : int + Number of hexagons horizontally + agg_func : function + Numpy compatible aggregator, this function must take a one-dimensional + np.ndarray as input and output a scalar + min_count : int + Minimum number of points in the hexagon for the hexagon to be displayed + + Returns + ------- + np.ndarray + Lat coordinates of each hexagon (shape M x 6) + np.ndarray + Lon coordinates of each hexagon (shape M x 6) + pd.Series + Unique id for each hexagon, to be used in the geojson data (shape M) + np.ndarray + Aggregated value in each hexagon (shape M) + + """ + # Project to WGS 84 + x, y = _project_latlon_to_wgs84(lat, lon) + + if lat_range is None: + lat_range = np.array([lat.min(), lat.max()]) + if lon_range is None: + lon_range = np.array([lon.min(), lon.max()]) + + x_range, y_range = _project_latlon_to_wgs84(lat_range, lon_range) + + hxs, hys, centers, agreggated_value = _compute_hexbin( + x, y, x_range, y_range, color, nx, agg_func, min_count + ) + # Convert back to lat-lon hexagons_lats, hexagons_lons = _project_wgs84_to_latlon(hxs, hys) @@ -237,7 +288,7 @@ def _compute_hexbin( def _hexagons_to_geojson(hexagons_lats, hexagons_lons, ids=None): """ Creates a geojson of hexagonal features based on the outputs of - _compute_hexbin + _compute_wgs84_hexbin """ features = [] if ids is None: @@ -255,12 +306,12 @@ def _hexagons_to_geojson(hexagons_lats, hexagons_lons, ids=None): return dict(type="FeatureCollection", features=features) -def hexbin_mapbox( +def create_hexbin_mapbox( data_frame=None, lat=None, lon=None, color=None, - gridsize=5, + nx_hexagon=5, agg_func=None, animation_frame=None, color_discrete_sequence=None, @@ -278,6 +329,9 @@ def hexbin_mapbox( width=None, height=None, ): + """ + Returns a figure aggregating scattered points into connected hexagons + """ args = build_dataframe(args=locals(), constructor=None) if agg_func is None: @@ -286,13 +340,13 @@ def hexbin_mapbox( lat_range = args["data_frame"][args["lat"]].agg(["min", "max"]).values lon_range = args["data_frame"][args["lon"]].agg(["min", "max"]).values - hexagons_lats, hexagons_lons, hexagons_ids, count = _compute_hexbin( + hexagons_lats, hexagons_lons, hexagons_ids, count = _compute_wgs84_hexbin( lat=args["data_frame"][args["lat"]].values, lon=args["data_frame"][args["lon"]].values, lat_range=lat_range, lon_range=lon_range, color=None, - nx=gridsize, + nx=nx_hexagon, agg_func=agg_func, min_count=-np.inf, ) @@ -323,13 +377,13 @@ def hexbin_mapbox( agg_data_frame_list = [] for frame, index in groups.items(): df = args["data_frame"].loc[index] - _, _, hexagons_ids, aggregated_value = _compute_hexbin( + _, _, hexagons_ids, aggregated_value = _compute_wgs84_hexbin( lat=df[args["lat"]].values, lon=df[args["lon"]].values, lat_range=lat_range, lon_range=lon_range, color=df[args["color"]].values if args["color"] else None, - nx=gridsize, + nx=nx_hexagon, agg_func=agg_func, min_count=None, ) @@ -372,5 +426,14 @@ def hexbin_mapbox( height=height, ) - -hexbin_mapbox.__doc__ = make_docstring(hexbin_mapbox) +create_hexbin_mapbox.__doc__ = make_docstring( + create_hexbin_mapbox, + override_dict=dict( + nx_hexagon=["int", "Number of hexagons (horizontally) to be created"], + agg_func=[ + "function", + "Numpy array aggregator, it must take as input a 1D array", + "and output a scalar value.", + ], + ) +) From ee06b495c0d79d607bbd7ec0e8eafce7ccf02095 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Fri, 10 Jul 2020 12:26:22 +1000 Subject: [PATCH 09/14] black --- .../python/plotly/plotly/figure_factory/__init__.py | 1 + .../plotly/plotly/figure_factory/_hexbin_mapbox.py | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/__init__.py b/packages/python/plotly/plotly/figure_factory/__init__.py index 6d3e5d01ed7..0a41dca1ba2 100644 --- a/packages/python/plotly/plotly/figure_factory/__init__.py +++ b/packages/python/plotly/plotly/figure_factory/__init__.py @@ -34,6 +34,7 @@ def create_choropleth(*args, **kwargs): raise ImportError("Please install pandas to use `create_choropleth`") + def create_hexbin_mapbox(*args, **kwargs): raise ImportError("Please install pandas to use `create_hexbin_mapbox`") diff --git a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py index c7e47a1d1d9..2ecf4e02d3e 100644 --- a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py @@ -53,9 +53,8 @@ def zoom(mapPx, worldPx, fraction): return min(latZoom, lngZoom, ZOOM_MAX) -def _compute_hexbin( - x, y, x_range, y_range, color, nx, agg_func, min_count -): + +def _compute_hexbin(x, y, x_range, y_range, color, nx, agg_func, min_count): """ Computes the aggregation at hexagonal bin level. Also defines the coordinates of the hexagons for plotting. @@ -214,6 +213,7 @@ def _compute_hexbin( return hxs, hys, centers, agreggated_value + def _compute_wgs84_hexbin( lat=None, lon=None, @@ -273,7 +273,7 @@ def _compute_wgs84_hexbin( hxs, hys, centers, agreggated_value = _compute_hexbin( x, y, x_range, y_range, color, nx, agg_func, min_count - ) + ) # Convert back to lat-lon hexagons_lats, hexagons_lons = _project_wgs84_to_latlon(hxs, hys) @@ -426,6 +426,7 @@ def create_hexbin_mapbox( height=height, ) + create_hexbin_mapbox.__doc__ = make_docstring( create_hexbin_mapbox, override_dict=dict( @@ -435,5 +436,5 @@ def create_hexbin_mapbox( "Numpy array aggregator, it must take as input a 1D array", "and output a scalar value.", ], - ) + ), ) From e491d05cf394b45c4deb7c2fb34539f8059c26e7 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Fri, 10 Jul 2020 13:36:02 +1000 Subject: [PATCH 10/14] Remove previous px hexbin stuff --- doc/python/plotly-express.md | 11 ----------- test/percy/plotly-express.py | 15 --------------- 2 files changed, 26 deletions(-) diff --git a/doc/python/plotly-express.md b/doc/python/plotly-express.md index e8479250591..9284a49f398 100644 --- a/doc/python/plotly-express.md +++ b/doc/python/plotly-express.md @@ -340,17 +340,6 @@ fig = px.line_mapbox(df, lat="centroid_lat", lon="centroid_lon", color="peak_hou fig.show() ``` -```python -import plotly.express as px -import numpy as np -px.set_mapbox_access_token(open(".mapbox_token").read()) -df = px.data.carshare() -fig = px.hexbin_mapbox(df, lat="centroid_lat", lon="centroid_lon", color="peak_hour", - color_continuous_scale=px.colors.cyclical.IceFire, labels={"color": "Average peak hour"}, - gridsize=10, agg_func=np.mean) -fig.show() -``` - ```python import plotly.express as px df = px.data.gapminder() diff --git a/test/percy/plotly-express.py b/test/percy/plotly-express.py index 95d28943f67..0a1b1064c50 100644 --- a/test/percy/plotly-express.py +++ b/test/percy/plotly-express.py @@ -443,21 +443,6 @@ ) fig.write_html(os.path.join(dir_name, "line_mapbox.html")) -import plotly.express as px -import numpy as np - -carshare = px.data.carshare() -fig = px.hexbin_mapbox( - carshare, - lat="centroid_lat", - lon="centroid_lon", - color="peak_hour", - color_continuous_scale=px.colors.cyclical.IceFire, - gridsize=10, - agg_func=np.mean, -) -fig.write_html(os.path.join(dir_name, "hexbin_mapbox.html")) - import plotly.express as px sample_geojson = { From 9c0b04375f5917e91892b2b198cd9097ccdd0a1c Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Fri, 10 Jul 2020 21:15:00 +1000 Subject: [PATCH 11/14] Hexbin_mapbox improvements: centered on the lat-axis. Hexbin_mapbox tests: 1) Aggregation results 2) Check build_dataframe behaviour --- .../plotly/figure_factory/_hexbin_mapbox.py | 45 ++-- .../test_figure_factory.py | 199 ++++++++++++++++++ 2 files changed, 229 insertions(+), 15 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py index 2ecf4e02d3e..52ad7ce6619 100644 --- a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py @@ -97,11 +97,25 @@ def _compute_hexbin(x, y, x_range, y_range, color, nx, agg_func, min_count): ymin = y_range.min() ymax = y_range.max() + # In the x-direction, the hexagons exactly cover the region from + # xmin to xmax. Need some padding to avoid roundoff errors. + padding = 1.0e-9 * (xmax - xmin) + xmin -= padding + xmax += padding + Dx = xmax - xmin Dy = ymax - ymin - dx = Dx / nx + if Dx == 0 and Dy > 0: + dx = Dy / nx + elif Dx == 0 and Dy == 0: + dx, _ = _project_latlon_to_wgs84(1, 1) + else: + dx = Dx / nx dy = dx * np.sqrt(3) - ny = np.round(Dy / dy).astype(int) + ny = np.ceil(Dy / dy).astype(int) + + # Center the hexagons vertically since we only want regular hexagons + ymin -= (ymin + dy * ny - ymax) / 2 x = (x - xmin) / dx y = (y - ymin) / dy @@ -134,7 +148,7 @@ def _compute_hexbin(x, y, x_range, y_range, color, nx, agg_func, min_count): good_idxs = ~np.isnan(accum) else: if min_count is None: - min_count = 0 + min_count = 1 # create accumulation arrays lattice1 = np.empty((nx1, ny1), dtype=object) @@ -157,14 +171,14 @@ def _compute_hexbin(x, y, x_range, y_range, color, nx, agg_func, min_count): for i in range(nx1): for j in range(ny1): vals = lattice1[i, j] - if len(vals) > min_count: + if len(vals) >= min_count: lattice1[i, j] = agg_func(vals) else: lattice1[i, j] = np.nan for i in range(nx2): for j in range(ny2): vals = lattice2[i, j] - if len(vals) > min_count: + if len(vals) >= min_count: lattice2[i, j] = agg_func(vals) else: lattice2[i, j] = np.nan @@ -201,15 +215,9 @@ def _compute_hexbin(x, y, x_range, y_range, color, nx, agg_func, min_count): # Number of hexagons needed m = len(centers) - # Scale of hexagons - dxh = sorted(list(set(np.diff(sorted(centers[:, 0])))))[1] - dyh = sorted(list(set(np.diff(sorted(centers[:, 1])))))[1] - nx = dxh * 2 - ny = 2 / 3 * dyh / (0.5 / np.cos(np.pi / 6)) - # Coordinates for all hexagonal patches - hxs = np.array([hx] * m) * nx + np.vstack(centers[:, 0]) - hys = np.array([hy] * m) * ny + np.vstack(centers[:, 1]) + hxs = np.array([hx] * m) * dx + np.vstack(centers[:, 0]) + hys = np.array([hy] * m) * dy / np.sqrt(3) + np.vstack(centers[:, 1]) return hxs, hys, centers, agreggated_value @@ -328,6 +336,7 @@ def create_hexbin_mapbox( template=None, width=None, height=None, + min_count=None, ): """ Returns a figure aggregating scattered points into connected hexagons @@ -348,7 +357,7 @@ def create_hexbin_mapbox( color=None, nx=nx_hexagon, agg_func=agg_func, - min_count=-np.inf, + min_count=min_count, ) geojson = _hexagons_to_geojson(hexagons_lats, hexagons_lons, hexagons_ids) @@ -385,7 +394,7 @@ def create_hexbin_mapbox( color=df[args["color"]].values if args["color"] else None, nx=nx_hexagon, agg_func=agg_func, - min_count=None, + min_count=min_count, ) agg_data_frame_list.append( pd.DataFrame( @@ -436,5 +445,11 @@ def create_hexbin_mapbox( "Numpy array aggregator, it must take as input a 1D array", "and output a scalar value.", ], + min_count=[ + "int", + "Minimum number of points in a hexagon for it to be displayed.", + "If None and color is not set, display all hexagons.", + "If None and color is set, only display hexagons that contain points.", + ], ), ) diff --git a/packages/python/plotly/plotly/tests/test_optional/test_figure_factory/test_figure_factory.py b/packages/python/plotly/plotly/tests/test_optional/test_figure_factory/test_figure_factory.py index 0106b9dc58f..807b0f60423 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_figure_factory/test_figure_factory.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_figure_factory/test_figure_factory.py @@ -4307,3 +4307,202 @@ def test_optional_arguments(self): # This test does not work for ilr interpolation print(len(fig.data)) assert len(fig.data) == ncontours + 2 + arg_set["showscale"] + + +class TestHexbinMapbox(NumpyTestUtilsMixin, TestCaseNoTemplate): + def test_aggregation(self): + + lat = [0, 1, 1, 2, 4, 5, 1, 2, 4, 5, 2, 3, 2, 1, 5, 3, 5] + lon = [1, 2, 3, 3, 0, 4, 5, 0, 5, 3, 1, 5, 4, 0, 1, 2, 5] + color = np.ones(len(lat)) + + fig1 = ff.create_hexbin_mapbox(lat=lat, lon=lon, nx_hexagon=1) + + actual_geojson = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "id": "-8.726646259971648e-11,-0.031886255679892235", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-5e-09, -4.7083909316316985], + [2.4999999999999996, -3.268549270944215], + [2.4999999999999996, -0.38356933397072673], + [-5e-09, 1.0597430482129082], + [-2.50000001, -0.38356933397072673], + [-2.50000001, -3.268549270944215], + [-5e-09, -4.7083909316316985], + ] + ], + }, + }, + { + "type": "Feature", + "id": "-8.726646259971648e-11,0.1192636916419258", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-5e-09, 3.9434377827164666], + [2.4999999999999996, 5.381998306154031], + [2.4999999999999996, 8.248045720432454], + [-5e-09, 9.673766164509932], + [-2.50000001, 8.248045720432454], + [-2.50000001, 5.381998306154031], + [-5e-09, 3.9434377827164666], + ] + ], + }, + }, + { + "type": "Feature", + "id": "0.08726646268698293,-0.031886255679892235", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [5.0000000049999995, -4.7083909316316985], + [7.500000009999999, -3.268549270944215], + [7.500000009999999, -0.38356933397072673], + [5.0000000049999995, 1.0597430482129082], + [2.5, -0.38356933397072673], + [2.5, -3.268549270944215], + [5.0000000049999995, -4.7083909316316985], + ] + ], + }, + }, + { + "type": "Feature", + "id": "0.08726646268698293,0.1192636916419258", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [5.0000000049999995, 3.9434377827164666], + [7.500000009999999, 5.381998306154031], + [7.500000009999999, 8.248045720432454], + [5.0000000049999995, 9.673766164509932], + [2.5, 8.248045720432454], + [2.5, 5.381998306154031], + [5.0000000049999995, 3.9434377827164666], + ] + ], + }, + }, + { + "type": "Feature", + "id": "0.04363323129985823,0.04368871798101678", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [2.4999999999999996, -0.38356933397072673], + [5.0000000049999995, 1.0597430482129082], + [5.0000000049999995, 3.9434377827164666], + [2.4999999999999996, 5.381998306154031], + [-5.0000001310894304e-09, 3.9434377827164666], + [-5.0000001310894304e-09, 1.0597430482129082], + [2.4999999999999996, -0.38356933397072673], + ] + ], + }, + }, + ], + } + + actual_agg = [2.0, 2.0, 1.0, 3.0, 9.0] + + self.assert_dict_equal(fig1.data[0].geojson, actual_geojson) + assert np.array_equal(fig1.data[0].z, actual_agg) + + fig2 = ff.create_hexbin_mapbox( + lat=lat, lon=lon, nx_hexagon=1, color=color, agg_func=np.mean, + ) + + assert np.array_equal(fig2.data[0].z, np.ones(5)) + + fig3 = ff.create_hexbin_mapbox( + lat=np.random.randn(1000), lon=np.random.randn(1000), nx_hexagon=20, + ) + + assert fig3.data[0].z.sum() == 1000 + + def test_build_dataframe(self): + np.random.seed(0) + N = 10000 + nx_hexagon = 20 + n_frames = 3 + + lat = np.random.randn(N) + lon = np.random.randn(N) + color = np.ones(N) + frame = np.random.randint(0, n_frames, N) + df = pd.DataFrame( + np.c_[lat, lon, color, frame], + columns=["Latitude", "Longitude", "Metric", "Frame"], + ) + + fig1 = ff.create_hexbin_mapbox(lat=lat, lon=lon, nx_hexagon=nx_hexagon) + fig2 = ff.create_hexbin_mapbox( + data_frame=df, lat="Latitude", lon="Longitude", nx_hexagon=nx_hexagon + ) + + assert isinstance(fig1, go.Figure) + assert len(fig1.data) == 1 + self.assert_dict_equal( + fig1.to_plotly_json()["data"][0], fig2.to_plotly_json()["data"][0] + ) + + fig3 = ff.create_hexbin_mapbox( + lat=lat, + lon=lon, + nx_hexagon=nx_hexagon, + color=color, + agg_func=np.sum, + min_count=0, + ) + fig4 = ff.create_hexbin_mapbox( + lat=lat, lon=lon, nx_hexagon=nx_hexagon, color=color, agg_func=np.sum, + ) + fig5 = ff.create_hexbin_mapbox( + data_frame=df, + lat="Latitude", + lon="Longitude", + nx_hexagon=nx_hexagon, + color="Metric", + agg_func=np.sum, + ) + + self.assert_dict_equal( + fig1.to_plotly_json()["data"][0], fig3.to_plotly_json()["data"][0] + ) + self.assert_dict_equal( + fig4.to_plotly_json()["data"][0], fig5.to_plotly_json()["data"][0] + ) + + fig6 = ff.create_hexbin_mapbox( + data_frame=df, + lat="Latitude", + lon="Longitude", + nx_hexagon=nx_hexagon, + color="Metric", + agg_func=np.sum, + animation_frame="Frame", + ) + + fig7 = ff.create_hexbin_mapbox( + lat=lat, + lon=lon, + nx_hexagon=nx_hexagon, + color=color, + agg_func=np.sum, + animation_frame=frame, + ) + + assert len(fig6.frames) == n_frames + assert len(fig7.frames) == n_frames + assert fig6.data[0].geojson == fig1.data[0].geojson From 0a1f48cf9e82d6b7e7f63e9f2fdf8d82b743e548 Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Sat, 11 Jul 2020 11:04:57 +1000 Subject: [PATCH 12/14] Option to display original data. Documentation page. --- doc/python/figure-factories.md | 1 + doc/python/hexbin-mapbox.md | 169 ++++++++++++++++++ .../plotly/figure_factory/_hexbin_mapbox.py | 40 ++++- 3 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 doc/python/hexbin-mapbox.md diff --git a/doc/python/figure-factories.md b/doc/python/figure-factories.md index 881185e332a..27fd4896aa0 100644 --- a/doc/python/figure-factories.md +++ b/doc/python/figure-factories.md @@ -42,6 +42,7 @@ The following types of plots are still difficult to create with Graph Objects or * [Annotated Heatmaps](/python/annotated-heatmap/) * [Dendrograms](/python/dendrogram/) * [Gantt Charts](/python/gantt/) + * [Hexagonal Binning Mapbox](/python/hexbin-mapbox/) * [Quiver Plots](/python/quiver-plots/) * [Streamline Plots](/python/streamline-plots/) * [Tables](/python/figure-factory-table/) diff --git a/doc/python/hexbin-mapbox.md b/doc/python/hexbin-mapbox.md new file mode 100644 index 00000000000..3fffe632e3f --- /dev/null +++ b/doc/python/hexbin-mapbox.md @@ -0,0 +1,169 @@ +--- +jupyter: + jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: markdown + format_version: '1.2' + jupytext_version: 1.5.1 + kernelspec: + display_name: Python 3 + language: python + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.7.4 + plotly: + description: How to make a map with Hexagonal Binning of data in Python with Plotly. + display_as: scientific + language: python + layout: base + name: Hexbin Mapbox + order: 7 + page_type: u-guide + permalink: python/hexbin-mapbox/ + redirect_from: python/hexbin-mapbox/ + thumbnail: thumbnail/hexbin_mapbox.jpg +--- + +#### Simple Count Hexbin + +This page details the use of a [figure factory](/python/figure-factories/). For more examples with Choropleth maps, see [this page](/python/choropleth-maps/). + +In order to use mapbox styles that require a mapbox token, set the token with `plotly.express`. You can also use styles that do not require a mapbox token. See more information on [this page](/python/mapbox-layers/). + +```python +import plotly.figure_factory as ff +import plotly.express as px + +px.set_mapbox_access_token(open(".mapbox_token").read()) +df = px.data.carshare() + +fig = ff.create_hexbin_mapbox( + data_frame=df, lat="centroid_lat", lon="centroid_lon", + nx_hexagon=10, opacity=0.9, labels={"color": "Point Count"}, +) +fig.update_layout(margin=dict(b=0, t=0, l=0, r=0)) +fig.show() +``` + +#### Count Hexbin with Minimum Count + +```python +import plotly.figure_factory as ff +import plotly.express as px + +px.set_mapbox_access_token(open(".mapbox_token").read()) +df = px.data.carshare() + +fig = ff.create_hexbin_mapbox( + data_frame=df, lat="centroid_lat", lon="centroid_lon", + nx_hexagon=10, opacity=0.9, labels={"color": "Point Count"}, + min_count=1, +) +fig.show() +``` + +#### Display the Underlying Data + +```python +import plotly.figure_factory as ff +import plotly.express as px + +px.set_mapbox_access_token(open(".mapbox_token").read()) +df = px.data.carshare() + +fig = ff.create_hexbin_mapbox( + data_frame=df, lat="centroid_lat", lon="centroid_lon", + nx_hexagon=10, opacity=0.9, labels={"color": "Point Count"}, + min_count=1, color_continuous_scale="Viridis", + show_original_data=True, + original_data_marker=dict(size=4, opacity=0.6, color="deeppink") +) +fig.show() +``` + +#### Compute the Mean Value per Hexbin + +```python +import plotly.figure_factory as ff +import plotly.express as px + +px.set_mapbox_access_token(open(".mapbox_token").read()) +df = px.data.carshare() + +fig = ff.create_hexbin_mapbox( + data_frame=df, lat="centroid_lat", lon="centroid_lon", + nx_hexagon=10, opacity=0.9, labels={"color": "Average Peak Hour"}, + color="peak_hour", agg_func=np.mean, color_continuous_scale="Icefire", range_color=[0,23] +) +fig.show() +``` + +#### Compute the Sum Value per Hexbin + +```python +import plotly.figure_factory as ff +import plotly.express as px + +px.set_mapbox_access_token(open(".mapbox_token").read()) +df = px.data.carshare() + +fig = ff.create_hexbin_mapbox( + data_frame=df, lat="centroid_lat", lon="centroid_lon", + nx_hexagon=10, opacity=0.9, labels={"color": "Summed Car.Hours"}, + color="car_hours", agg_func=np.sum, color_continuous_scale="Magma" +) +fig.show() +``` + +#### Hexbin with Animation + +```python +import plotly.figure_factory as ff +import plotly.express as px +import numpy as np + +px.set_mapbox_access_token(open(".mapbox_token").read()) +np.random.seed(0) + +N = 500 +n_frames = 12 +lat = np.concatenate([ + np.random.randn(N) * 0.5 + np.cos(i / n_frames * 2 * np.pi) + for i in range(n_frames) +]) +lon = np.concatenate([ + np.random.randn(N) * 0.5 + np.sin(i / n_frames * 2 * np.pi) + for i in range(n_frames) +]) +frame = np.concatenate([ + np.ones(N, int) * i for i in range(n_frames) +]) + +fig = ff.create_hexbin_mapbox( + lat=lat, lon=lon, nx_hexagon=15, animation_frame=frame, + color_continuous_scale="Cividis", labels={"color": "Point Count", "frame": "Period"}, + show_original_data=True, original_data_marker=dict(opacity=0.6, size=4, color="deeppink") +) +fig.update_layout(margin=dict(b=0, t=0, l=0, r=0)) +fig.layout.sliders[0].pad.t=20 +fig.layout.updatemenus[0].pad.t=40 +fig.show() +``` + +#### Reference + +For more info on Plotly maps, see: https://plotly.com/python/maps.
For more info on using colorscales with Plotly see: https://plotly.com/python/heatmap-and-contour-colorscales/
For more info on `ff.create_annotated_heatmap()`, see the [full function reference](https://plotly.com/python-api-reference/generated/plotly.figure_factory.create_hexbin_mapbox.html#plotly.figure_factory.create_hexbin_mapbox) + +```python + +``` diff --git a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py index 52ad7ce6619..363e9734c78 100644 --- a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py @@ -1,6 +1,6 @@ from plotly.express._core import build_dataframe from plotly.express._doc import make_docstring -from plotly.express._chart_types import choropleth_mapbox +from plotly.express._chart_types import choropleth_mapbox, scatter_mapbox import numpy as np import pandas as pd @@ -337,6 +337,8 @@ def create_hexbin_mapbox( width=None, height=None, min_count=None, + show_original_data=False, + original_data_marker=None, ): """ Returns a figure aggregating scattered points into connected hexagons @@ -412,7 +414,7 @@ def create_hexbin_mapbox( if range_color is None: range_color = [agg_data_frame["color"].min(), agg_data_frame["color"].max()] - return choropleth_mapbox( + fig = choropleth_mapbox( data_frame=agg_data_frame, geojson=geojson, locations="locations", @@ -435,6 +437,35 @@ def create_hexbin_mapbox( height=height, ) + if show_original_data: + original_fig = scatter_mapbox( + data_frame=( + args["data_frame"].sort_values(by=args["animation_frame"]) + if args["animation_frame"] is not None else + args["data_frame"] + ), + lat=args["lat"], + lon=args["lon"], + animation_frame=args["animation_frame"] + ) + original_fig.data[0].hoverinfo = "skip" + original_fig.data[0].hovertemplate = None + original_fig.data[0].marker = original_data_marker + + fig.add_trace(original_fig.data[0]) + + if args["animation_frame"] is not None: + for i in range(len(original_fig.frames)): + original_fig.frames[i].data[0].hoverinfo = "skip" + original_fig.frames[i].data[0].hovertemplate = None + original_fig.frames[i].data[0].marker = original_data_marker + + fig.frames[i].data = [ + fig.frames[i].data[0], original_fig.frames[i].data[0], + ] + + return fig + create_hexbin_mapbox.__doc__ = make_docstring( create_hexbin_mapbox, @@ -451,5 +482,10 @@ def create_hexbin_mapbox( "If None and color is not set, display all hexagons.", "If None and color is set, only display hexagons that contain points.", ], + show_original_data=[ + "bool", + "Whether to show the original data on top of the hexbin aggregation." + ], + original_data_marker=["dict", "Scattermapbox marker options."] ), ) From 9b3c2ba85a4b45515aee0504cf4059049131380a Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Sat, 11 Jul 2020 11:06:08 +1000 Subject: [PATCH 13/14] black --- .../plotly/plotly/figure_factory/_hexbin_mapbox.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py index 363e9734c78..6ee22245753 100644 --- a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py +++ b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py @@ -441,12 +441,12 @@ def create_hexbin_mapbox( original_fig = scatter_mapbox( data_frame=( args["data_frame"].sort_values(by=args["animation_frame"]) - if args["animation_frame"] is not None else - args["data_frame"] + if args["animation_frame"] is not None + else args["data_frame"] ), lat=args["lat"], lon=args["lon"], - animation_frame=args["animation_frame"] + animation_frame=args["animation_frame"], ) original_fig.data[0].hoverinfo = "skip" original_fig.data[0].hovertemplate = None @@ -461,7 +461,8 @@ def create_hexbin_mapbox( original_fig.frames[i].data[0].marker = original_data_marker fig.frames[i].data = [ - fig.frames[i].data[0], original_fig.frames[i].data[0], + fig.frames[i].data[0], + original_fig.frames[i].data[0], ] return fig @@ -484,8 +485,8 @@ def create_hexbin_mapbox( ], show_original_data=[ "bool", - "Whether to show the original data on top of the hexbin aggregation." + "Whether to show the original data on top of the hexbin aggregation.", ], - original_data_marker=["dict", "Scattermapbox marker options."] + original_data_marker=["dict", "Scattermapbox marker options."], ), ) From 72b118beb50cc569c39e79a430e02d7280537cde Mon Sep 17 00:00:00 2001 From: Renaud Laine Date: Sat, 11 Jul 2020 11:15:35 +1000 Subject: [PATCH 14/14] removed unnecessary cell in doc file --- doc/python/hexbin-mapbox.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/python/hexbin-mapbox.md b/doc/python/hexbin-mapbox.md index 3fffe632e3f..4f010ac224a 100644 --- a/doc/python/hexbin-mapbox.md +++ b/doc/python/hexbin-mapbox.md @@ -163,7 +163,3 @@ fig.show() #### Reference For more info on Plotly maps, see: https://plotly.com/python/maps.
For more info on using colorscales with Plotly see: https://plotly.com/python/heatmap-and-contour-colorscales/
For more info on `ff.create_annotated_heatmap()`, see the [full function reference](https://plotly.com/python-api-reference/generated/plotly.figure_factory.create_hexbin_mapbox.html#plotly.figure_factory.create_hexbin_mapbox) - -```python - -```